001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2020 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.coding; 021 022import java.util.ArrayList; 023import java.util.BitSet; 024import java.util.HashMap; 025import java.util.List; 026import java.util.Map; 027import java.util.regex.Pattern; 028 029import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 030import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 031import com.puppycrawl.tools.checkstyle.api.DetailAST; 032import com.puppycrawl.tools.checkstyle.api.TokenTypes; 033import com.puppycrawl.tools.checkstyle.utils.CheckUtil; 034import com.puppycrawl.tools.checkstyle.utils.TokenUtil; 035 036/** 037 * <p> 038 * Checks for multiple occurrences of the same string literal within a single file. 039 * </p> 040 * <p> 041 * Rationale: Code duplication makes maintenance more difficult, so it can be better 042 * to replace the multiple occurrences with a constant. 043 * </p> 044 * <ul> 045 * <li> 046 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences 047 * to allow without generating a warning. 048 * Type is {@code int}. 049 * Default value is {@code 1}. 050 * </li> 051 * <li> 052 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks). 053 * Type is {@code java.util.regex.Pattern}. 054 * Default value is {@code "^""$"}. 055 * </li> 056 * <li> 057 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate 058 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to 059 * exclude syntactical contexts like annotations or static initializers from the check. 060 * Type is {@code java.lang.String[]}. 061 * Validation type is {@code tokenTypesSet}. 062 * Default value is {@code ANNOTATION}. 063 * </li> 064 * </ul> 065 * <p> 066 * To configure the check: 067 * </p> 068 * <pre> 069 * <module name="MultipleStringLiterals"/> 070 * </pre> 071 * <p> 072 * To configure the check so that it allows two occurrences of each string: 073 * </p> 074 * <pre> 075 * <module name="MultipleStringLiterals"> 076 * <property name="allowedDuplicates" value="2"/> 077 * </module> 078 * </pre> 079 * <p> 080 * To configure the check so that it ignores ", " and empty strings: 081 * </p> 082 * <pre> 083 * <module name="MultipleStringLiterals"> 084 * <property name="ignoreStringsRegexp" 085 * value='^(("")|(", "))$'/> 086 * </module> 087 * </pre> 088 * <p> 089 * To configure the check so that it flags duplicate strings in all syntactical contexts, 090 * even in annotations like {@code @SuppressWarnings("unchecked")}: 091 * </p> 092 * <pre> 093 * <module name="MultipleStringLiterals"> 094 * <property name="ignoreOccurrenceContext" value=""/> 095 * </module> 096 * </pre> 097 * <p> 098 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 099 * </p> 100 * <p> 101 * Violation Message Keys: 102 * </p> 103 * <ul> 104 * <li> 105 * {@code multiple.string.literal} 106 * </li> 107 * </ul> 108 * 109 * @since 3.5 110 */ 111@FileStatefulCheck 112public class MultipleStringLiteralsCheck extends AbstractCheck { 113 114 /** 115 * A key is pointing to the warning message text in "messages.properties" 116 * file. 117 */ 118 public static final String MSG_KEY = "multiple.string.literal"; 119 120 /** 121 * Compiled pattern for all system newlines. 122 */ 123 private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R"); 124 125 /** 126 * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL. 127 */ 128 private static final String QUOTE = "\""; 129 130 /** 131 * The found strings and their tokens. 132 */ 133 private final Map<String, List<DetailAST>> stringMap = new HashMap<>(); 134 135 /** 136 * Specify token type names where duplicate strings are ignored even if they 137 * don't match ignoredStringsRegexp. This allows you to exclude syntactical 138 * contexts like annotations or static initializers from the check. 139 */ 140 private final BitSet ignoreOccurrenceContext = new BitSet(); 141 142 /** 143 * Specify the maximum number of occurrences to allow without generating a warning. 144 */ 145 private int allowedDuplicates = 1; 146 147 /** 148 * Specify RegExp for ignored strings (with quotation marks). 149 */ 150 private Pattern ignoreStringsRegexp; 151 152 /** 153 * Construct an instance with default values. 154 */ 155 public MultipleStringLiteralsCheck() { 156 setIgnoreStringsRegexp(Pattern.compile("^\"\"$")); 157 ignoreOccurrenceContext.set(TokenTypes.ANNOTATION); 158 } 159 160 /** 161 * Setter to specify the maximum number of occurrences to allow without generating a warning. 162 * 163 * @param allowedDuplicates The maximum number of duplicates. 164 */ 165 public void setAllowedDuplicates(int allowedDuplicates) { 166 this.allowedDuplicates = allowedDuplicates; 167 } 168 169 /** 170 * Setter to specify RegExp for ignored strings (with quotation marks). 171 * 172 * @param ignoreStringsRegexp 173 * regular expression pattern for ignored strings 174 * @noinspection WeakerAccess 175 */ 176 public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) { 177 if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) { 178 this.ignoreStringsRegexp = null; 179 } 180 else { 181 this.ignoreStringsRegexp = ignoreStringsRegexp; 182 } 183 } 184 185 /** 186 * Setter to specify token type names where duplicate strings are ignored even 187 * if they don't match ignoredStringsRegexp. This allows you to exclude 188 * syntactical contexts like annotations or static initializers from the check. 189 * 190 * @param strRep the string representation of the tokens interested in 191 */ 192 public final void setIgnoreOccurrenceContext(String... strRep) { 193 ignoreOccurrenceContext.clear(); 194 for (final String s : strRep) { 195 final int type = TokenUtil.getTokenId(s); 196 ignoreOccurrenceContext.set(type); 197 } 198 } 199 200 @Override 201 public int[] getDefaultTokens() { 202 return getRequiredTokens(); 203 } 204 205 @Override 206 public int[] getAcceptableTokens() { 207 return getRequiredTokens(); 208 } 209 210 @Override 211 public int[] getRequiredTokens() { 212 return new int[] { 213 TokenTypes.STRING_LITERAL, 214 TokenTypes.TEXT_BLOCK_CONTENT, 215 }; 216 } 217 218 @Override 219 public void visitToken(DetailAST ast) { 220 if (!isInIgnoreOccurrenceContext(ast)) { 221 final String currentString; 222 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) { 223 final String strippedString = 224 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText()); 225 // We need to add quotes here to be consistent with STRING_LITERAL text. 226 currentString = QUOTE + strippedString + QUOTE; 227 } 228 else { 229 currentString = ast.getText(); 230 } 231 if (ignoreStringsRegexp == null 232 || !ignoreStringsRegexp.matcher(currentString).find()) { 233 stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast); 234 } 235 } 236 } 237 238 /** 239 * Analyses the path from the AST root to a given AST for occurrences 240 * of the token types in {@link #ignoreOccurrenceContext}. 241 * 242 * @param ast the node from where to start searching towards the root node 243 * @return whether the path from the root node to ast contains one of the 244 * token type in {@link #ignoreOccurrenceContext}. 245 */ 246 private boolean isInIgnoreOccurrenceContext(DetailAST ast) { 247 boolean isInIgnoreOccurrenceContext = false; 248 for (DetailAST token = ast; 249 token.getParent() != null; 250 token = token.getParent()) { 251 final int type = token.getType(); 252 if (ignoreOccurrenceContext.get(type)) { 253 isInIgnoreOccurrenceContext = true; 254 break; 255 } 256 } 257 return isInIgnoreOccurrenceContext; 258 } 259 260 @Override 261 public void beginTree(DetailAST rootAST) { 262 stringMap.clear(); 263 } 264 265 @Override 266 public void finishTree(DetailAST rootAST) { 267 for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) { 268 final List<DetailAST> hits = stringListEntry.getValue(); 269 if (hits.size() > allowedDuplicates) { 270 final DetailAST firstFinding = hits.get(0); 271 final String recurringString = 272 ALL_NEW_LINES.matcher( 273 stringListEntry.getKey()).replaceAll("\\\\n"); 274 log(firstFinding, MSG_KEY, recurringString, hits.size()); 275 } 276 } 277 } 278} 279