001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2023 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.header; 021 022import java.io.File; 023import java.util.ArrayList; 024import java.util.BitSet; 025import java.util.List; 026import java.util.regex.Pattern; 027import java.util.regex.PatternSyntaxException; 028 029import com.puppycrawl.tools.checkstyle.StatelessCheck; 030import com.puppycrawl.tools.checkstyle.api.FileText; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032import com.puppycrawl.tools.checkstyle.utils.TokenUtil; 033 034/** 035 * <p> 036 * Checks the header of a source file against a header that contains a 037 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html"> 038 * pattern</a> for each line of the source header. 039 * </p> 040 * <p> 041 * Rationale: In some projects <a href="https://checkstyle.org/config_header.html#Header"> 042 * checking against a fixed header</a> is not sufficient, e.g. the header might 043 * require a copyright line where the year information is not static. 044 * </p> 045 * <p> 046 * For example, consider the following header: 047 * </p> 048 * <pre> 049 * line 1: ^/{71}$ 050 * line 2: ^// checkstyle:$ 051 * line 3: ^// Checks Java source code for adherence to a set of rules\.$ 052 * line 4: ^// Copyright \(C\) \d\d\d\d Oliver Burn$ 053 * line 5: ^// Last modification by \$Author.*\$$ 054 * line 6: ^/{71}$ 055 * line 7: 056 * line 8: ^package 057 * line 9: 058 * line 10: ^import 059 * line 11: 060 * line 12: ^/\*\* 061 * line 13: ^ \*([^/]|$) 062 * line 14: ^ \*/ 063 * </pre> 064 * <p> 065 * Lines 1 and 6 demonstrate a more compact notation for 71 '/' characters. 066 * Line 4 enforces that the copyright notice includes a four digit year. 067 * Line 5 is an example how to enforce revision control keywords in a file header. 068 * Lines 12-14 is a template for javadoc (line 13 is so complicated to remove 069 * conflict with and of javadoc comment). Lines 7, 9 and 11 will be treated 070 * as '^$' and will forcefully expect the line to be empty. 071 * </p> 072 * <p> 073 * Different programming languages have different comment syntax rules, 074 * but all of them start a comment with a non-word character. 075 * Hence, you can often use the non-word character class to abstract away 076 * the concrete comment syntax and allow checking the header for different 077 * languages with a single header definition. For example, consider the following 078 * header specification (note that this is not the full Apache license header): 079 * </p> 080 * <pre> 081 * line 1: ^#! 082 * line 2: ^<\?xml.*>$ 083 * line 3: ^\W*$ 084 * line 4: ^\W*Copyright 2006 The Apache Software Foundation or its licensors, as applicable\.$ 085 * line 5: ^\W*Licensed under the Apache License, Version 2\.0 \(the "License"\);$ 086 * line 6: ^\W*$ 087 * </pre> 088 * <p> 089 * Lines 1 and 2 leave room for technical header lines, e.g. the "#!/bin/sh" 090 * line in Unix shell scripts, or the XML file header of XML files. 091 * Set the multiline property to "1, 2" so these lines can be ignored for 092 * file types where they do no apply. Lines 3 through 6 define the actual header content. 093 * Note how lines 2, 4 and 5 use escapes for characters that have special regexp semantics. 094 * </p> 095 * <p> 096 * In default configuration, if header is not specified, the default value 097 * of header is set to null and the check does not rise any violations. 098 * </p> 099 * <ul> 100 * <li> 101 * Property {@code headerFile} - Specify the name of the file containing the required header. 102 * Type is {@code java.net.URI}. 103 * Default value is {@code null}. 104 * </li> 105 * <li> 106 * Property {@code charset} - Specify the character encoding to use when reading the headerFile. 107 * Type is {@code java.lang.String}. 108 * Default value is {@code the charset property of the parent 109 * <a href="https://checkstyle.org/config.html#Checker">Checker</a> module}. 110 * </li> 111 * <li> 112 * Property {@code header} - Define the required header specified inline. 113 * Individual header lines must be separated by the string {@code "\n"} 114 * (even on platforms with a different line separator). 115 * For header lines containing {@code "\n\n"} checkstyle will 116 * forcefully expect an empty line to exist. See examples below. 117 * Regular expressions must not span multiple lines. 118 * Type is {@code java.lang.String}. 119 * Default value is {@code null}. 120 * </li> 121 * <li> 122 * Property {@code multiLines} - Specify the line numbers to repeat (zero or more times). 123 * Type is {@code int[]}. 124 * Default value is {@code ""}. 125 * </li> 126 * <li> 127 * Property {@code fileExtensions} - Specify the file type extension of files to process. 128 * Type is {@code java.lang.String[]}. 129 * Default value is {@code ""}. 130 * </li> 131 * </ul> 132 * <p> 133 * To configure the check such that no violations arise. 134 * Default values of properties are used. 135 * </p> 136 * <pre> 137 * <module name="RegexpHeader"/> 138 * </pre> 139 * <p> 140 * To configure the check to use header file {@code "config/java.header"} and 141 * {@code 10} and {@code 13} multi-lines: 142 * </p> 143 * <pre> 144 * <module name="RegexpHeader"> 145 * <property name="headerFile" value="config/java.header"/> 146 * <property name="multiLines" value="10, 13"/> 147 * </module> 148 * </pre> 149 * <p> 150 * To configure the check to verify that each file starts with the header 151 * </p> 152 * <pre> 153 * ^// Copyright \(C\) (\d\d\d\d -)? 2004 MyCompany$ 154 * ^// All rights reserved$ 155 * </pre> 156 * <p> 157 * without the need for an external header file: 158 * </p> 159 * <pre> 160 * <module name="RegexpHeader"> 161 * <property 162 * name="header" 163 * value="^// Copyright \(C\) (\d\d\d\d -)? 2004 MyCompany$ 164 * \n^// All rights reserved$"/> 165 * </module> 166 * </pre> 167 * <p> 168 * For regex containing {@code "\n\n"} 169 * </p> 170 * <pre> 171 * <module name="RegexpHeader"> 172 * <property 173 * name="header" 174 * value="^package .*\n\n.*"/> 175 * </module> 176 * </pre> 177 * <p> 178 * {@code "\n\n"} will be treated as '^$' and will forcefully expect the line 179 * to be empty. For example - 180 * </p> 181 * <pre> 182 * package com.some.package; 183 * public class ThisWillFail { } 184 * </pre> 185 * <p> 186 * would fail for the regex above. Expected - 187 * </p> 188 * <pre> 189 * package com.some.package; 190 * 191 * public class ThisWillPass { } 192 * </pre> 193 * <p> 194 * <u>Note</u>: {@code ignoreLines} property has been removed from this check to simplify it. 195 * To make some line optional use "^.*$" regexp for this line. 196 * </p> 197 * <p> 198 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker} 199 * </p> 200 * <p> 201 * Violation Message Keys: 202 * </p> 203 * <ul> 204 * <li> 205 * {@code header.mismatch} 206 * </li> 207 * <li> 208 * {@code header.missing} 209 * </li> 210 * </ul> 211 * 212 * @since 6.9 213 */ 214@StatelessCheck 215public class RegexpHeaderCheck extends AbstractHeaderCheck { 216 217 /** 218 * A key is pointing to the warning message text in "messages.properties" 219 * file. 220 */ 221 public static final String MSG_HEADER_MISSING = "header.missing"; 222 223 /** 224 * A key is pointing to the warning message text in "messages.properties" 225 * file. 226 */ 227 public static final String MSG_HEADER_MISMATCH = "header.mismatch"; 228 229 /** Regex pattern for a blank line. **/ 230 private static final String EMPTY_LINE_PATTERN = "^$"; 231 232 /** Compiled regex pattern for a blank line. **/ 233 private static final Pattern BLANK_LINE = Pattern.compile(EMPTY_LINE_PATTERN); 234 235 /** The compiled regular expressions. */ 236 private final List<Pattern> headerRegexps = new ArrayList<>(); 237 238 /** Specify the line numbers to repeat (zero or more times). */ 239 private BitSet multiLines = new BitSet(); 240 241 /** 242 * Setter to specify the line numbers to repeat (zero or more times). 243 * 244 * @param list line numbers to repeat in header. 245 */ 246 public void setMultiLines(int... list) { 247 multiLines = TokenUtil.asBitSet(list); 248 } 249 250 @Override 251 protected void processFiltered(File file, FileText fileText) { 252 final int headerSize = getHeaderLines().size(); 253 final int fileSize = fileText.size(); 254 255 if (headerSize - multiLines.cardinality() > fileSize) { 256 log(1, MSG_HEADER_MISSING); 257 } 258 else { 259 int headerLineNo = 0; 260 int index; 261 for (index = 0; headerLineNo < headerSize && index < fileSize; index++) { 262 final String line = fileText.get(index); 263 boolean isMatch = isMatch(line, headerLineNo); 264 while (!isMatch && isMultiLine(headerLineNo)) { 265 headerLineNo++; 266 isMatch = headerLineNo == headerSize 267 || isMatch(line, headerLineNo); 268 } 269 if (!isMatch) { 270 log(index + 1, MSG_HEADER_MISMATCH, getHeaderLine(headerLineNo)); 271 break; 272 } 273 if (!isMultiLine(headerLineNo)) { 274 headerLineNo++; 275 } 276 } 277 if (index == fileSize) { 278 // if file finished, but we have at least one non-multi-line 279 // header isn't completed 280 logFirstSinglelineLine(headerLineNo, headerSize); 281 } 282 } 283 } 284 285 /** 286 * Returns the line from the header. Where the line is blank return the regexp pattern 287 * for a blank line. 288 * 289 * @param headerLineNo header line number to return 290 * @return the line from the header 291 */ 292 private String getHeaderLine(int headerLineNo) { 293 String line = getHeaderLines().get(headerLineNo); 294 if (line.isEmpty()) { 295 line = EMPTY_LINE_PATTERN; 296 } 297 return line; 298 } 299 300 /** 301 * Logs warning if any non-multiline lines left in header regexp. 302 * 303 * @param startHeaderLine header line number to start from 304 * @param headerSize whole header size 305 */ 306 private void logFirstSinglelineLine(int startHeaderLine, int headerSize) { 307 for (int lineNum = startHeaderLine; lineNum < headerSize; lineNum++) { 308 if (!isMultiLine(lineNum)) { 309 log(1, MSG_HEADER_MISSING); 310 break; 311 } 312 } 313 } 314 315 /** 316 * Checks if a code line matches the required header line. 317 * 318 * @param line the code line 319 * @param headerLineNo the header line number. 320 * @return true if and only if the line matches the required header line. 321 */ 322 private boolean isMatch(String line, int headerLineNo) { 323 return headerRegexps.get(headerLineNo).matcher(line).find(); 324 } 325 326 /** 327 * Returns true if line is multiline header lines or false. 328 * 329 * @param lineNo a line number 330 * @return if {@code lineNo} is one of the repeat header lines. 331 */ 332 private boolean isMultiLine(int lineNo) { 333 return multiLines.get(lineNo + 1); 334 } 335 336 @Override 337 protected void postProcessHeaderLines() { 338 final List<String> headerLines = getHeaderLines(); 339 for (String line : headerLines) { 340 try { 341 if (line.isEmpty()) { 342 headerRegexps.add(BLANK_LINE); 343 } 344 else { 345 headerRegexps.add(Pattern.compile(line)); 346 } 347 } 348 catch (final PatternSyntaxException ex) { 349 throw new IllegalArgumentException("line " 350 + (headerRegexps.size() + 1) 351 + " in header specification" 352 + " is not a regular expression", ex); 353 } 354 } 355 } 356 357 /** 358 * Setter to define the required header specified inline. 359 * Individual header lines must be separated by the string {@code "\n"} 360 * (even on platforms with a different line separator). 361 * For header lines containing {@code "\n\n"} checkstyle will forcefully 362 * expect an empty line to exist. See examples below. 363 * Regular expressions must not span multiple lines. 364 * 365 * @param header the header value to validate and set (in that order) 366 */ 367 @Override 368 public void setHeader(String header) { 369 if (!CommonUtil.isBlank(header)) { 370 if (!CommonUtil.isPatternValid(header)) { 371 throw new IllegalArgumentException("Unable to parse format: " + header); 372 } 373 super.setHeader(header); 374 } 375 } 376 377}