001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2021 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.regexp;
021
022import java.io.File;
023import java.util.regex.Pattern;
024
025import com.puppycrawl.tools.checkstyle.StatelessCheck;
026import com.puppycrawl.tools.checkstyle.api.AbstractFileSetCheck;
027import com.puppycrawl.tools.checkstyle.api.FileText;
028
029/**
030 * <p>
031 * Checks that a specified pattern matches across multiple lines in any file type.
032 * </p>
033 * <p>
034 * Rationale: This check can be used to when the regular expression can be span multiple lines.
035 * </p>
036 * <ul>
037 * <li>
038 * Property {@code format} - Specify the format of the regular expression to match.
039 * Type is {@code java.lang.String}.
040 * Default value is {@code "$."}.
041 * </li>
042 * <li>
043 * Property {@code message} - Specify the message which is used to notify about
044 * violations, if empty then default (hard-coded) message is used.
045 * Type is {@code java.lang.String}.
046 * Default value is {@code null}.
047 * </li>
048 * <li>
049 * Property {@code ignoreCase} - Control whether to ignore case when searching.
050 * Type is {@code boolean}.
051 * Default value is {@code false}.
052 * </li>
053 * <li>
054 * Property {@code minimum} - Specify the minimum number of matches required in each file.
055 * Type is {@code int}.
056 * Default value is {@code 0}.
057 * </li>
058 * <li>
059 * Property {@code maximum} - Specify the maximum number of matches required in each file.
060 * Type is {@code int}.
061 * Default value is {@code 0}.
062 * </li>
063 * <li>
064 * Property {@code matchAcrossLines} - Control whether to match expressions
065 * across multiple lines.
066 * Type is {@code boolean}.
067 * Default value is {@code false}.
068 * </li>
069 * <li>
070 * Property {@code fileExtensions} - Specify the file type extension of files to process.
071 * Type is {@code java.lang.String[]}.
072 * Default value is {@code ""}.
073 * </li>
074 * </ul>
075 * <p>
076 * To run the check with its default configuration (no matches will be):
077 * </p>
078 * <pre>
079 * &lt;module name=&quot;RegexpMultiline&quot;/&gt;
080 * </pre>
081 * <p>Example: </p>
082 * <pre>
083 * void method() {
084 *   int i = 5; // OK
085 *   System.out.println(i); // OK
086 * }
087 * </pre>
088 * <p>
089 * To configure the check to find calls to print to the console:
090 * </p>
091 * <pre>
092 * &lt;module name="RegexpMultiline"&gt;
093 *   &lt;property name="format" value="System\.(out)|(err)\.print(ln)?\("/&gt;
094 * &lt;/module&gt;
095 * </pre>
096 * <p>
097 * Example:
098 * </p>
099 * <pre>
100 * void method() {
101 *   System.out.print("Example");   // violation
102 *   System.err.println("Example"); // violation
103 *   System.out.print
104 *     ("Example");                 // violation
105 *   System.err.println
106 *     ("Example");          // OK
107 *   System
108 *   .out.print("Example");  // OK
109 *   System
110 *   .err.println("Example");       // violation
111 *   System.
112 *   out.print("Example");   // OK
113 *   System.
114 *   err.println("Example");        // violation
115 * }
116 * </pre>
117 * <p>
118 * To configure the check to match text that spans multiple lines,
119 * like normal code in a Java file:
120 * </p>
121 * <pre>
122 * &lt;module name="RegexpMultiline"&gt;
123 *   &lt;property name="matchAcrossLines" value="true"/&gt;
124 *   &lt;property name="format" value="System\.out.*?print\("/&gt;
125 * &lt;/module&gt;
126 * </pre>
127 * <p>
128 * Example:
129 * </p>
130 * <pre>
131 * void method() {
132 *   System.out.print("Example");  // violation
133 *   System.err.println("Example");
134 *   System.out.print              // violation
135 *     ("Example");
136 *   System.err.println
137 *     ("Example");
138 *   System
139 *   .out.print("Example");
140 *   System
141 *   .err.println("Example");
142 *   System.
143 *   out.print("Example");
144 *   System.
145 *   err.println("Example");
146 * }
147 * </pre>
148 * <p>
149 * Note: Beware of the greedy regular expression used in the above example.
150 * {@code .*} will match as much as possible and not produce multiple violations
151 * in the file if multiple groups of lines could match the expression. To prevent
152 * an expression being too greedy, avoid overusing matching all text or allow it
153 * to be optional, like {@code .*?}. Changing the example expression to not be
154 * greedy will allow multiple violations in the example to be found in the same file.
155 * </p>
156 * <p>
157 * To configure the check to match a maximum of three test strings:
158 * </p>
159 * <pre>
160 * &lt;module name=&quot;RegexpMultiline&quot;&gt;
161 *   &lt;property name=&quot;format&quot; value=&quot;Test #[0-9]+:[A-Za-z ]+&quot;/&gt;
162 *   &lt;property name=&quot;ignoreCase&quot; value=&quot;true&quot;/&gt;
163 *   &lt;property name=&quot;maximum&quot; value=&quot;3&quot;/&gt;
164 * &lt;/module&gt;
165 * </pre>
166 * <p>
167 * Example:
168 * </p>
169 * <pre>
170 * void method() {
171 *   System.out.println("Test #1: this is a test string"); // OK
172 *   System.out.println("TeSt #2: This is a test string"); // OK
173 *   System.out.println("TEST #3: This is a test string"); // OK
174 *   int i = 5;
175 *   System.out.println("Value of i: " + i);
176 *   System.out.println("Test #4: This is a test string"); // violation
177 *   System.out.println("TEst #5: This is a test string"); // violation
178 * }
179 * </pre>
180 * <p>
181 * To configure the check to match a minimum of two test strings:
182 * </p>
183 * <pre>
184 * &lt;module name=&quot;RegexpMultiline&quot;&gt;
185 *   &lt;property name=&quot;format&quot; value=&quot;Test #[0-9]+:[A-Za-z ]+&quot;/&gt;
186 *   &lt;property name=&quot;minimum&quot; value=&quot;2&quot;/&gt;
187 * &lt;/module&gt;
188 * </pre>
189 * <p>
190 * Example:
191 * </p>
192 * <pre>
193 * void method() {
194 *   System.out.println("Test #1: this is a test string"); // violation
195 *   System.out.println("TEST #2: This is a test string"); // OK, "ignoreCase" is false by default
196 *   int i = 5;
197 *   System.out.println("Value of i: " + i);
198 *   System.out.println("Test #3: This is a test string"); // violation
199 *   System.out.println("Test #4: This is a test string"); // violation
200 * }
201 * </pre>
202 * <p>
203 * To configure the check to restrict an empty file:
204 * </p>
205 * <pre>
206 * &lt;module name=&quot;RegexpMultiline&quot;&gt;
207 *     &lt;property name=&quot;format&quot; value=&quot;^\s*$&quot; /&gt;
208 *     &lt;property name=&quot;matchAcrossLines&quot; value=&quot;true&quot; /&gt;
209 *     &lt;property name=&quot;message&quot; value=&quot;Empty file is not allowed&quot; /&gt;
210 * &lt;/module&gt;
211 * </pre>
212 * <p>
213 * Example of violation from the above config:
214 * </p>
215 * <pre>
216 * /var/tmp$ cat -n Test.java
217 * 1
218 * 2
219 * 3
220 * 4
221 * </pre>
222 * <p>Result:</p>
223 * <pre>
224 * /var/tmp/Test.java // violation, a file must not be empty.
225 * </pre>
226 * <p>
227 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker}
228 * </p>
229 * <p>
230 * Violation Message Keys:
231 * </p>
232 * <ul>
233 * <li>
234 * {@code regexp.StackOverflowError}
235 * </li>
236 * <li>
237 * {@code regexp.empty}
238 * </li>
239 * <li>
240 * {@code regexp.exceeded}
241 * </li>
242 * <li>
243 * {@code regexp.minimum}
244 * </li>
245 * </ul>
246 *
247 * @since 5.0
248 */
249@StatelessCheck
250public class RegexpMultilineCheck extends AbstractFileSetCheck {
251
252    /** Specify the format of the regular expression to match. */
253    private String format = "$.";
254    /**
255     * Specify the message which is used to notify about violations,
256     * if empty then default (hard-coded) message is used.
257     */
258    private String message;
259    /** Specify the minimum number of matches required in each file. */
260    private int minimum;
261    /** Specify the maximum number of matches required in each file. */
262    private int maximum;
263    /** Control whether to ignore case when searching. */
264    private boolean ignoreCase;
265    /** Control whether to match expressions across multiple lines. */
266    private boolean matchAcrossLines;
267
268    /** The detector to use. */
269    private MultilineDetector detector;
270
271    @Override
272    public void beginProcessing(String charset) {
273        final DetectorOptions options = DetectorOptions.newBuilder()
274            .reporter(this)
275            .compileFlags(getRegexCompileFlags())
276            .format(format)
277            .message(message)
278            .minimum(minimum)
279            .maximum(maximum)
280            .ignoreCase(ignoreCase)
281            .build();
282        detector = new MultilineDetector(options);
283    }
284
285    @Override
286    protected void processFiltered(File file, FileText fileText) {
287        detector.processLines(fileText);
288    }
289
290    /**
291     * Retrieves the compile flags for the regular expression being built based
292     * on {@code matchAcrossLines}.
293     *
294     * @return The compile flags.
295     */
296    private int getRegexCompileFlags() {
297        final int result;
298
299        if (matchAcrossLines) {
300            result = Pattern.DOTALL;
301        }
302        else {
303            result = Pattern.MULTILINE;
304        }
305
306        return result;
307    }
308
309    /**
310     * Setter to specify the format of the regular expression to match.
311     *
312     * @param format the format of the regular expression to match.
313     */
314    public void setFormat(String format) {
315        this.format = format;
316    }
317
318    /**
319     * Setter to specify the message which is used to notify about violations,
320     * if empty then default (hard-coded) message is used.
321     *
322     * @param message the message to report for a match.
323     */
324    public void setMessage(String message) {
325        this.message = message;
326    }
327
328    /**
329     * Setter to specify the minimum number of matches required in each file.
330     *
331     * @param minimum the minimum number of matches required in each file.
332     */
333    public void setMinimum(int minimum) {
334        this.minimum = minimum;
335    }
336
337    /**
338     * Setter to specify the maximum number of matches required in each file.
339     *
340     * @param maximum the maximum number of matches required in each file.
341     */
342    public void setMaximum(int maximum) {
343        this.maximum = maximum;
344    }
345
346    /**
347     * Setter to control whether to ignore case when searching.
348     *
349     * @param ignoreCase whether to ignore case when searching.
350     */
351    public void setIgnoreCase(boolean ignoreCase) {
352        this.ignoreCase = ignoreCase;
353    }
354
355    /**
356     * Setter to control whether to match expressions across multiple lines.
357     *
358     * @param matchAcrossLines whether to match expressions across multiple lines.
359     */
360    public void setMatchAcrossLines(boolean matchAcrossLines) {
361        this.matchAcrossLines = matchAcrossLines;
362    }
363
364}