001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2016 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.api;
021
022import java.io.BufferedReader;
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.FileNotFoundException;
026import java.io.IOException;
027import java.io.InputStreamReader;
028import java.io.Reader;
029import java.io.StringReader;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.CodingErrorAction;
033import java.nio.charset.UnsupportedCharsetException;
034import java.util.AbstractList;
035import java.util.ArrayList;
036import java.util.Arrays;
037import java.util.List;
038import java.util.regex.Matcher;
039import java.util.regex.Pattern;
040
041import org.apache.commons.lang3.ArrayUtils;
042
043import com.google.common.io.Closeables;
044
045/**
046 * Represents the text contents of a file of arbitrary plain text type.
047 * <p>
048 * This class will be passed to instances of class FileSetCheck by
049 * Checker. It implements a string list to ensure backwards
050 * compatibility, but can be extended in the future to allow more
051 * flexible, more powerful or more efficient handling of certain
052 * situations.
053 * </p>
054 *
055 * @author Martin von Gagern
056 */
057public final class FileText extends AbstractList<String> {
058
059    /**
060     * The number of characters to read in one go.
061     */
062    private static final int READ_BUFFER_SIZE = 1024;
063
064    /**
065     * Regular expression pattern matching all line terminators.
066     */
067    private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
068
069    // For now, we always keep both full text and lines array.
070    // In the long run, however, the one passed at initialization might be
071    // enough, while the other could be lazily created when requested.
072    // This would save memory but cost CPU cycles.
073
074    /**
075     * The name of the file.
076     * {@code null} if no file name is available for whatever reason.
077     */
078    private final File file;
079
080    /**
081     * The charset used to read the file.
082     * {@code null} if the file was reconstructed from a list of lines.
083     */
084    private final Charset charset;
085
086    /**
087     * The full text contents of the file.
088     */
089    private final String fullText;
090
091    /**
092     * The lines of the file, without terminators.
093     */
094    private final String[] lines;
095
096    /**
097     * The first position of each line within the full text.
098     */
099    private int[] lineBreaks;
100
101    /**
102     * Creates a new file text representation.
103     *
104     * <p>The file will be read using the specified encoding, replacing
105     * malformed input and unmappable characters with the default
106     * replacement character.
107     *
108     * @param file the name of the file
109     * @param charsetName the encoding to use when reading the file
110     * @throws NullPointerException if the text is null
111     * @throws IOException if the file could not be read
112     */
113    public FileText(File file, String charsetName) throws IOException {
114        this.file = file;
115
116        // We use our own decoder, to be sure we have complete control
117        // about replacements.
118        final CharsetDecoder decoder;
119        try {
120            charset = Charset.forName(charsetName);
121            decoder = charset.newDecoder();
122            decoder.onMalformedInput(CodingErrorAction.REPLACE);
123            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
124        }
125        catch (final UnsupportedCharsetException ex) {
126            final String message = "Unsupported charset: " + charsetName;
127            throw new IllegalStateException(message, ex);
128        }
129
130        fullText = readFile(file, decoder);
131
132        // Use the BufferedReader to break down the lines as this
133        // is about 30% faster than using the
134        // LINE_TERMINATOR.split(fullText, -1) method
135        final ArrayList<String> textLines = new ArrayList<>();
136        final BufferedReader reader =
137            new BufferedReader(new StringReader(fullText));
138        while (true) {
139            final String line = reader.readLine();
140            if (line == null) {
141                break;
142            }
143            textLines.add(line);
144        }
145        lines = textLines.toArray(new String[textLines.size()]);
146    }
147
148    /**
149     * Copy constructor.
150     * @param fileText to make copy of
151     */
152    public FileText(FileText fileText) {
153        file = fileText.file;
154        charset = fileText.charset;
155        fullText = fileText.fullText;
156        lines = fileText.lines.clone();
157        lineBreaks = ArrayUtils.clone(fileText.lineBreaks);
158    }
159
160    /**
161     * Compatibility constructor.
162     *
163     * <p>This constructor reconstructs the text of the file by joining
164     * lines with linefeed characters. This process does not restore
165     * the original line terminators and should therefore be avoided.
166     *
167     * @param file the name of the file
168     * @param lines the lines of the text, without terminators
169     * @throws NullPointerException if the lines array is null
170     */
171    private FileText(File file, List<String> lines) {
172        final StringBuilder buf = new StringBuilder();
173        for (final String line : lines) {
174            buf.append(line).append('\n');
175        }
176        buf.trimToSize();
177
178        this.file = file;
179        charset = null;
180        fullText = buf.toString();
181        this.lines = lines.toArray(new String[lines.size()]);
182    }
183
184    /**
185     * Reads file using specific decoder and returns all its content as a String.
186     * @param inputFile File to read
187     * @param decoder Charset decoder
188     * @return File's text
189     * @throws IOException Unable to open or read the file
190     */
191    private static String readFile(final File inputFile, final CharsetDecoder decoder)
192            throws IOException {
193        if (!inputFile.exists()) {
194            throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
195        }
196        final StringBuilder buf = new StringBuilder();
197        final FileInputStream stream = new FileInputStream(inputFile);
198        final Reader reader = new InputStreamReader(stream, decoder);
199        try {
200            final char[] chars = new char[READ_BUFFER_SIZE];
201            while (true) {
202                final int len = reader.read(chars);
203                if (len < 0) {
204                    break;
205                }
206                buf.append(chars, 0, len);
207            }
208        }
209        finally {
210            Closeables.closeQuietly(reader);
211        }
212        return buf.toString();
213    }
214
215    /**
216     * Compatibility conversion.
217     *
218     * <p>This method can be used to convert the arguments passed to
219     * {@link FileSetCheck#process(File,List)} to a FileText
220     * object. If the list of lines already is a FileText, it is
221     * returned as is. Otherwise, a new FileText is constructed by
222     * joining the lines using line feed characters.
223     *
224     * @param file the name of the file
225     * @param lines the lines of the text, without terminators
226     * @return an object representing the denoted text file
227     */
228    public static FileText fromLines(File file, List<String> lines) {
229        if (lines instanceof FileText) {
230            return (FileText) lines;
231        }
232        else {
233            return new FileText(file, lines);
234        }
235    }
236
237    /**
238     * Get the name of the file.
239     * @return an object containing the name of the file
240     */
241    public File getFile() {
242        return file;
243    }
244
245    /**
246     * Get the character set which was used to read the file.
247     * Will be {@code null} for a file reconstructed from its lines.
248     * @return the charset used when the file was read
249     */
250    public Charset getCharset() {
251        return charset;
252    }
253
254    /**
255     * Retrieve the full text of the file.
256     * @return the full text of the file
257     */
258    public CharSequence getFullText() {
259        return fullText;
260    }
261
262    /**
263     * Returns an array of all lines.
264     * {@code text.toLinesArray()} is equivalent to
265     * {@code text.toArray(new String[text.size()])}.
266     * @return an array of all lines of the text
267     */
268    public String[] toLinesArray() {
269        return lines.clone();
270    }
271
272    /**
273     * Find positions of line breaks in the full text.
274     * @return an array giving the first positions of each line.
275     */
276    private int[] findLineBreaks() {
277        if (lineBreaks == null) {
278            final int[] lineBreakPositions = new int[size() + 1];
279            lineBreakPositions[0] = 0;
280            int lineNo = 1;
281            final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
282            while (matcher.find()) {
283                lineBreakPositions[lineNo] = matcher.end();
284                lineNo++;
285            }
286            if (lineNo < lineBreakPositions.length) {
287                lineBreakPositions[lineNo] = fullText.length();
288            }
289            lineBreaks = lineBreakPositions;
290        }
291        return lineBreaks;
292    }
293
294    /**
295     * Determine line and column numbers in full text.
296     * @param pos the character position in the full text
297     * @return the line and column numbers of this character
298     */
299    public LineColumn lineColumn(int pos) {
300        final int[] lineBreakPositions = findLineBreaks();
301        int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
302        if (lineNo < 0) {
303            // we have: lineNo = -(insertion point) - 1
304            // we want: lineNo =  (insertion point) - 1
305            lineNo = -lineNo - 2;
306        }
307        final int startOfLine = lineBreakPositions[lineNo];
308        final int columnNo = pos - startOfLine;
309        // now we have lineNo and columnNo, both starting at zero.
310        return new LineColumn(lineNo + 1, columnNo);
311    }
312
313    /**
314     * Retrieves a line of the text by its number.
315     * The returned line will not contain a trailing terminator.
316     * @param lineNo the number of the line to get, starting at zero
317     * @return the line with the given number
318     */
319    @Override
320    public String get(final int lineNo) {
321        return lines[lineNo];
322    }
323
324    /**
325     * Counts the lines of the text.
326     * @return the number of lines in the text
327     */
328    @Override
329    public int size() {
330        return lines.length;
331    }
332
333}