001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2023 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.header;
021
022import java.io.File;
023import java.util.ArrayList;
024import java.util.BitSet;
025import java.util.List;
026import java.util.regex.Pattern;
027import java.util.regex.PatternSyntaxException;
028
029import com.puppycrawl.tools.checkstyle.StatelessCheck;
030import com.puppycrawl.tools.checkstyle.api.FileText;
031import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
032import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
033
034/**
035 * <p>
036 * Checks the header of a source file against a header that contains a
037 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html">
038 * pattern</a> for each line of the source header.
039 * </p>
040 * <p>
041 * Rationale: In some projects <a href="https://checkstyle.org/config_header.html#Header">
042 * checking against a fixed header</a> is not sufficient, e.g. the header might
043 * require a copyright line where the year information is not static.
044 * </p>
045 * <p>
046 * For example, consider the following header:
047 * </p>
048 * <pre>
049 * line  1: ^/{71}$
050 * line  2: ^// checkstyle:$
051 * line  3: ^// Checks Java source code for adherence to a set of rules\.$
052 * line  4: ^// Copyright \(C\) \d\d\d\d  Oliver Burn$
053 * line  5: ^// Last modification by \$Author.*\$$
054 * line  6: ^/{71}$
055 * line  7:
056 * line  8: ^package
057 * line  9:
058 * line 10: ^import
059 * line 11:
060 * line 12: ^/\*\*
061 * line 13: ^ \*([^/]|$)
062 * line 14: ^ \*&#47;
063 * </pre>
064 * <p>
065 * Lines 1 and 6 demonstrate a more compact notation for 71 '/' characters.
066 * Line 4 enforces that the copyright notice includes a four digit year.
067 * Line 5 is an example how to enforce revision control keywords in a file header.
068 * Lines 12-14 is a template for javadoc (line 13 is so complicated to remove
069 * conflict with and of javadoc comment). Lines 7, 9 and 11 will be treated
070 * as '^$' and will forcefully expect the line to be empty.
071 * </p>
072 * <p>
073 * Different programming languages have different comment syntax rules,
074 * but all of them start a comment with a non-word character.
075 * Hence, you can often use the non-word character class to abstract away
076 * the concrete comment syntax and allow checking the header for different
077 * languages with a single header definition. For example, consider the following
078 * header specification (note that this is not the full Apache license header):
079 * </p>
080 * <pre>
081 * line 1: ^#!
082 * line 2: ^&lt;\?xml.*&gt;$
083 * line 3: ^\W*$
084 * line 4: ^\W*Copyright 2006 The Apache Software Foundation or its licensors, as applicable\.$
085 * line 5: ^\W*Licensed under the Apache License, Version 2\.0 \(the "License"\);$
086 * line 6: ^\W*$
087 * </pre>
088 * <p>
089 * Lines 1 and 2 leave room for technical header lines, e.g. the "#!/bin/sh"
090 * line in Unix shell scripts, or the XML file header of XML files.
091 * Set the multiline property to "1, 2" so these lines can be ignored for
092 * file types where they do no apply. Lines 3 through 6 define the actual header content.
093 * Note how lines 2, 4 and 5 use escapes for characters that have special regexp semantics.
094 * </p>
095 * <p>
096 * In default configuration, if header is not specified, the default value
097 * of header is set to null and the check does not rise any violations.
098 * </p>
099 * <ul>
100 * <li>
101 * Property {@code headerFile} - Specify the name of the file containing the required header.
102 * Type is {@code java.net.URI}.
103 * Default value is {@code null}.
104 * </li>
105 * <li>
106 * Property {@code charset} - Specify the character encoding to use when reading the headerFile.
107 * Type is {@code java.lang.String}.
108 * Default value is {@code the charset property of the parent
109 * <a href="https://checkstyle.org/config.html#Checker">Checker</a> module}.
110 * </li>
111 * <li>
112 * Property {@code header} - Define the required header specified inline.
113 * Individual header lines must be separated by the string {@code "\n"}
114 * (even on platforms with a different line separator).
115 * For header lines containing {@code "\n\n"} checkstyle will
116 * forcefully expect an empty line to exist. See examples below.
117 * Regular expressions must not span multiple lines.
118 * Type is {@code java.lang.String}.
119 * Default value is {@code null}.
120 * </li>
121 * <li>
122 * Property {@code multiLines} - Specify the line numbers to repeat (zero or more times).
123 * Type is {@code int[]}.
124 * Default value is {@code ""}.
125 * </li>
126 * <li>
127 * Property {@code fileExtensions} - Specify the file type extension of files to process.
128 * Type is {@code java.lang.String[]}.
129 * Default value is {@code ""}.
130 * </li>
131 * </ul>
132 * <p>
133 * To configure the check such that no violations arise.
134 * Default values of properties are used.
135 * </p>
136 * <pre>
137 * &lt;module name="RegexpHeader"/&gt;
138 * </pre>
139 * <p>
140 * To configure the check to use header file {@code "config/java.header"} and
141 * {@code 10} and {@code 13} multi-lines:
142 * </p>
143 * <pre>
144 * &lt;module name="RegexpHeader"&gt;
145 *   &lt;property name="headerFile" value="config/java.header"/&gt;
146 *   &lt;property name="multiLines" value="10, 13"/&gt;
147 * &lt;/module&gt;
148 * </pre>
149 * <p>
150 * To configure the check to verify that each file starts with the header
151 * </p>
152 * <pre>
153 * ^// Copyright \(C\) (\d\d\d\d -)? 2004 MyCompany$
154 * ^// All rights reserved$
155 * </pre>
156 * <p>
157 * without the need for an external header file:
158 * </p>
159 * <pre>
160 * &lt;module name="RegexpHeader"&gt;
161 *   &lt;property
162 *     name="header"
163 *     value="^// Copyright \(C\) (\d\d\d\d -)? 2004 MyCompany$
164 *       \n^// All rights reserved$"/&gt;
165 * &lt;/module&gt;
166 * </pre>
167 * <p>
168 * For regex containing {@code "\n\n"}
169 * </p>
170 * <pre>
171 * &lt;module name="RegexpHeader"&gt;
172 *   &lt;property
173 *     name="header"
174 *     value="^package .*\n\n.*"/&gt;
175 * &lt;/module&gt;
176 * </pre>
177 * <p>
178 * {@code "\n\n"} will be treated as '^$' and will forcefully expect the line
179 * to be empty. For example -
180 * </p>
181 * <pre>
182 * package com.some.package;
183 * public class ThisWillFail { }
184 * </pre>
185 * <p>
186 * would fail for the regex above. Expected -
187 * </p>
188 * <pre>
189 * package com.some.package;
190 *
191 * public class ThisWillPass { }
192 * </pre>
193 * <p>
194 * <u>Note</u>: {@code ignoreLines} property has been removed from this check to simplify it.
195 * To make some line optional use "^.*$" regexp for this line.
196 * </p>
197 * <p>
198 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker}
199 * </p>
200 * <p>
201 * Violation Message Keys:
202 * </p>
203 * <ul>
204 * <li>
205 * {@code header.mismatch}
206 * </li>
207 * <li>
208 * {@code header.missing}
209 * </li>
210 * </ul>
211 *
212 * @since 6.9
213 */
214@StatelessCheck
215public class RegexpHeaderCheck extends AbstractHeaderCheck {
216
217    /**
218     * A key is pointing to the warning message text in "messages.properties"
219     * file.
220     */
221    public static final String MSG_HEADER_MISSING = "header.missing";
222
223    /**
224     * A key is pointing to the warning message text in "messages.properties"
225     * file.
226     */
227    public static final String MSG_HEADER_MISMATCH = "header.mismatch";
228
229    /** Regex pattern for a blank line. **/
230    private static final String EMPTY_LINE_PATTERN = "^$";
231
232    /** Compiled regex pattern for a blank line. **/
233    private static final Pattern BLANK_LINE = Pattern.compile(EMPTY_LINE_PATTERN);
234
235    /** The compiled regular expressions. */
236    private final List<Pattern> headerRegexps = new ArrayList<>();
237
238    /** Specify the line numbers to repeat (zero or more times). */
239    private BitSet multiLines = new BitSet();
240
241    /**
242     * Setter to specify the line numbers to repeat (zero or more times).
243     *
244     * @param list line numbers to repeat in header.
245     */
246    public void setMultiLines(int... list) {
247        multiLines = TokenUtil.asBitSet(list);
248    }
249
250    @Override
251    protected void processFiltered(File file, FileText fileText) {
252        final int headerSize = getHeaderLines().size();
253        final int fileSize = fileText.size();
254
255        if (headerSize - multiLines.cardinality() > fileSize) {
256            log(1, MSG_HEADER_MISSING);
257        }
258        else {
259            int headerLineNo = 0;
260            int index;
261            for (index = 0; headerLineNo < headerSize && index < fileSize; index++) {
262                final String line = fileText.get(index);
263                boolean isMatch = isMatch(line, headerLineNo);
264                while (!isMatch && isMultiLine(headerLineNo)) {
265                    headerLineNo++;
266                    isMatch = headerLineNo == headerSize
267                            || isMatch(line, headerLineNo);
268                }
269                if (!isMatch) {
270                    log(index + 1, MSG_HEADER_MISMATCH, getHeaderLine(headerLineNo));
271                    break;
272                }
273                if (!isMultiLine(headerLineNo)) {
274                    headerLineNo++;
275                }
276            }
277            if (index == fileSize) {
278                // if file finished, but we have at least one non-multi-line
279                // header isn't completed
280                logFirstSinglelineLine(headerLineNo, headerSize);
281            }
282        }
283    }
284
285    /**
286     * Returns the line from the header. Where the line is blank return the regexp pattern
287     * for a blank line.
288     *
289     * @param headerLineNo header line number to return
290     * @return the line from the header
291     */
292    private String getHeaderLine(int headerLineNo) {
293        String line = getHeaderLines().get(headerLineNo);
294        if (line.isEmpty()) {
295            line = EMPTY_LINE_PATTERN;
296        }
297        return line;
298    }
299
300    /**
301     * Logs warning if any non-multiline lines left in header regexp.
302     *
303     * @param startHeaderLine header line number to start from
304     * @param headerSize whole header size
305     */
306    private void logFirstSinglelineLine(int startHeaderLine, int headerSize) {
307        for (int lineNum = startHeaderLine; lineNum < headerSize; lineNum++) {
308            if (!isMultiLine(lineNum)) {
309                log(1, MSG_HEADER_MISSING);
310                break;
311            }
312        }
313    }
314
315    /**
316     * Checks if a code line matches the required header line.
317     *
318     * @param line the code line
319     * @param headerLineNo the header line number.
320     * @return true if and only if the line matches the required header line.
321     */
322    private boolean isMatch(String line, int headerLineNo) {
323        return headerRegexps.get(headerLineNo).matcher(line).find();
324    }
325
326    /**
327     * Returns true if line is multiline header lines or false.
328     *
329     * @param lineNo a line number
330     * @return if {@code lineNo} is one of the repeat header lines.
331     */
332    private boolean isMultiLine(int lineNo) {
333        return multiLines.get(lineNo + 1);
334    }
335
336    @Override
337    protected void postProcessHeaderLines() {
338        final List<String> headerLines = getHeaderLines();
339        for (String line : headerLines) {
340            try {
341                if (line.isEmpty()) {
342                    headerRegexps.add(BLANK_LINE);
343                }
344                else {
345                    headerRegexps.add(Pattern.compile(line));
346                }
347            }
348            catch (final PatternSyntaxException ex) {
349                throw new IllegalArgumentException("line "
350                        + (headerRegexps.size() + 1)
351                        + " in header specification"
352                        + " is not a regular expression", ex);
353            }
354        }
355    }
356
357    /**
358     * Setter to define the required header specified inline.
359     * Individual header lines must be separated by the string {@code "\n"}
360     * (even on platforms with a different line separator).
361     * For header lines containing {@code "\n\n"} checkstyle will forcefully
362     * expect an empty line to exist. See examples below.
363     * Regular expressions must not span multiple lines.
364     *
365     * @param header the header value to validate and set (in that order)
366     */
367    @Override
368    public void setHeader(String header) {
369        if (!CommonUtil.isBlank(header)) {
370            if (!CommonUtil.isPatternValid(header)) {
371                throw new IllegalArgumentException("Unable to parse format: " + header);
372            }
373            super.setHeader(header);
374        }
375    }
376
377}