001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2016 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.api; 021 022import java.io.BufferedReader; 023import java.io.File; 024import java.io.FileInputStream; 025import java.io.FileNotFoundException; 026import java.io.IOException; 027import java.io.InputStreamReader; 028import java.io.Reader; 029import java.io.StringReader; 030import java.nio.charset.Charset; 031import java.nio.charset.CharsetDecoder; 032import java.nio.charset.CodingErrorAction; 033import java.nio.charset.UnsupportedCharsetException; 034import java.util.AbstractList; 035import java.util.ArrayList; 036import java.util.Arrays; 037import java.util.List; 038import java.util.regex.Matcher; 039import java.util.regex.Pattern; 040 041import org.apache.commons.lang3.ArrayUtils; 042 043import com.google.common.io.Closeables; 044 045/** 046 * Represents the text contents of a file of arbitrary plain text type. 047 * <p> 048 * This class will be passed to instances of class FileSetCheck by 049 * Checker. It implements a string list to ensure backwards 050 * compatibility, but can be extended in the future to allow more 051 * flexible, more powerful or more efficient handling of certain 052 * situations. 053 * </p> 054 * 055 * @author Martin von Gagern 056 */ 057public final class FileText extends AbstractList<String> { 058 059 /** 060 * The number of characters to read in one go. 061 */ 062 private static final int READ_BUFFER_SIZE = 1024; 063 064 /** 065 * Regular expression pattern matching all line terminators. 066 */ 067 private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?"); 068 069 // For now, we always keep both full text and lines array. 070 // In the long run, however, the one passed at initialization might be 071 // enough, while the other could be lazily created when requested. 072 // This would save memory but cost CPU cycles. 073 074 /** 075 * The name of the file. 076 * {@code null} if no file name is available for whatever reason. 077 */ 078 private final File file; 079 080 /** 081 * The charset used to read the file. 082 * {@code null} if the file was reconstructed from a list of lines. 083 */ 084 private final Charset charset; 085 086 /** 087 * The full text contents of the file. 088 */ 089 private final String fullText; 090 091 /** 092 * The lines of the file, without terminators. 093 */ 094 private final String[] lines; 095 096 /** 097 * The first position of each line within the full text. 098 */ 099 private int[] lineBreaks; 100 101 /** 102 * Creates a new file text representation. 103 * 104 * <p>The file will be read using the specified encoding, replacing 105 * malformed input and unmappable characters with the default 106 * replacement character. 107 * 108 * @param file the name of the file 109 * @param charsetName the encoding to use when reading the file 110 * @throws NullPointerException if the text is null 111 * @throws IOException if the file could not be read 112 */ 113 public FileText(File file, String charsetName) throws IOException { 114 this.file = file; 115 116 // We use our own decoder, to be sure we have complete control 117 // about replacements. 118 final CharsetDecoder decoder; 119 try { 120 charset = Charset.forName(charsetName); 121 decoder = charset.newDecoder(); 122 decoder.onMalformedInput(CodingErrorAction.REPLACE); 123 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 124 } 125 catch (final UnsupportedCharsetException ex) { 126 final String message = "Unsupported charset: " + charsetName; 127 throw new IllegalStateException(message, ex); 128 } 129 130 fullText = readFile(file, decoder); 131 132 // Use the BufferedReader to break down the lines as this 133 // is about 30% faster than using the 134 // LINE_TERMINATOR.split(fullText, -1) method 135 final ArrayList<String> textLines = new ArrayList<>(); 136 final BufferedReader reader = 137 new BufferedReader(new StringReader(fullText)); 138 while (true) { 139 final String line = reader.readLine(); 140 if (line == null) { 141 break; 142 } 143 textLines.add(line); 144 } 145 lines = textLines.toArray(new String[textLines.size()]); 146 } 147 148 /** 149 * Copy constructor. 150 * @param fileText to make copy of 151 */ 152 public FileText(FileText fileText) { 153 file = fileText.file; 154 charset = fileText.charset; 155 fullText = fileText.fullText; 156 lines = fileText.lines.clone(); 157 lineBreaks = ArrayUtils.clone(fileText.lineBreaks); 158 } 159 160 /** 161 * Compatibility constructor. 162 * 163 * <p>This constructor reconstructs the text of the file by joining 164 * lines with linefeed characters. This process does not restore 165 * the original line terminators and should therefore be avoided. 166 * 167 * @param file the name of the file 168 * @param lines the lines of the text, without terminators 169 * @throws NullPointerException if the lines array is null 170 */ 171 private FileText(File file, List<String> lines) { 172 final StringBuilder buf = new StringBuilder(); 173 for (final String line : lines) { 174 buf.append(line).append('\n'); 175 } 176 buf.trimToSize(); 177 178 this.file = file; 179 charset = null; 180 fullText = buf.toString(); 181 this.lines = lines.toArray(new String[lines.size()]); 182 } 183 184 /** 185 * Reads file using specific decoder and returns all its content as a String. 186 * @param inputFile File to read 187 * @param decoder Charset decoder 188 * @return File's text 189 * @throws IOException Unable to open or read the file 190 */ 191 private static String readFile(final File inputFile, final CharsetDecoder decoder) 192 throws IOException { 193 if (!inputFile.exists()) { 194 throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)"); 195 } 196 final StringBuilder buf = new StringBuilder(); 197 final FileInputStream stream = new FileInputStream(inputFile); 198 final Reader reader = new InputStreamReader(stream, decoder); 199 try { 200 final char[] chars = new char[READ_BUFFER_SIZE]; 201 while (true) { 202 final int len = reader.read(chars); 203 if (len < 0) { 204 break; 205 } 206 buf.append(chars, 0, len); 207 } 208 } 209 finally { 210 Closeables.closeQuietly(reader); 211 } 212 return buf.toString(); 213 } 214 215 /** 216 * Compatibility conversion. 217 * 218 * <p>This method can be used to convert the arguments passed to 219 * {@link FileSetCheck#process(File,List)} to a FileText 220 * object. If the list of lines already is a FileText, it is 221 * returned as is. Otherwise, a new FileText is constructed by 222 * joining the lines using line feed characters. 223 * 224 * @param file the name of the file 225 * @param lines the lines of the text, without terminators 226 * @return an object representing the denoted text file 227 */ 228 public static FileText fromLines(File file, List<String> lines) { 229 if (lines instanceof FileText) { 230 return (FileText) lines; 231 } 232 else { 233 return new FileText(file, lines); 234 } 235 } 236 237 /** 238 * Get the name of the file. 239 * @return an object containing the name of the file 240 */ 241 public File getFile() { 242 return file; 243 } 244 245 /** 246 * Get the character set which was used to read the file. 247 * Will be {@code null} for a file reconstructed from its lines. 248 * @return the charset used when the file was read 249 */ 250 public Charset getCharset() { 251 return charset; 252 } 253 254 /** 255 * Retrieve the full text of the file. 256 * @return the full text of the file 257 */ 258 public CharSequence getFullText() { 259 return fullText; 260 } 261 262 /** 263 * Returns an array of all lines. 264 * {@code text.toLinesArray()} is equivalent to 265 * {@code text.toArray(new String[text.size()])}. 266 * @return an array of all lines of the text 267 */ 268 public String[] toLinesArray() { 269 return lines.clone(); 270 } 271 272 /** 273 * Find positions of line breaks in the full text. 274 * @return an array giving the first positions of each line. 275 */ 276 private int[] findLineBreaks() { 277 if (lineBreaks == null) { 278 final int[] lineBreakPositions = new int[size() + 1]; 279 lineBreakPositions[0] = 0; 280 int lineNo = 1; 281 final Matcher matcher = LINE_TERMINATOR.matcher(fullText); 282 while (matcher.find()) { 283 lineBreakPositions[lineNo] = matcher.end(); 284 lineNo++; 285 } 286 if (lineNo < lineBreakPositions.length) { 287 lineBreakPositions[lineNo] = fullText.length(); 288 } 289 lineBreaks = lineBreakPositions; 290 } 291 return lineBreaks; 292 } 293 294 /** 295 * Determine line and column numbers in full text. 296 * @param pos the character position in the full text 297 * @return the line and column numbers of this character 298 */ 299 public LineColumn lineColumn(int pos) { 300 final int[] lineBreakPositions = findLineBreaks(); 301 int lineNo = Arrays.binarySearch(lineBreakPositions, pos); 302 if (lineNo < 0) { 303 // we have: lineNo = -(insertion point) - 1 304 // we want: lineNo = (insertion point) - 1 305 lineNo = -lineNo - 2; 306 } 307 final int startOfLine = lineBreakPositions[lineNo]; 308 final int columnNo = pos - startOfLine; 309 // now we have lineNo and columnNo, both starting at zero. 310 return new LineColumn(lineNo + 1, columnNo); 311 } 312 313 /** 314 * Retrieves a line of the text by its number. 315 * The returned line will not contain a trailing terminator. 316 * @param lineNo the number of the line to get, starting at zero 317 * @return the line with the given number 318 */ 319 @Override 320 public String get(final int lineNo) { 321 return lines[lineNo]; 322 } 323 324 /** 325 * Counts the lines of the text. 326 * @return the number of lines in the text 327 */ 328 @Override 329 public int size() { 330 return lines.length; 331 } 332 333}