001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2020 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.coding;
021
022import java.util.ArrayList;
023import java.util.BitSet;
024import java.util.HashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.regex.Pattern;
028
029import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
030import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
031import com.puppycrawl.tools.checkstyle.api.DetailAST;
032import com.puppycrawl.tools.checkstyle.api.TokenTypes;
033import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
034import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
035
036/**
037 * <p>
038 * Checks for multiple occurrences of the same string literal within a single file.
039 * </p>
040 * <p>
041 * Rationale: Code duplication makes maintenance more difficult, so it can be better
042 * to replace the multiple occurrences with a constant.
043 * </p>
044 * <ul>
045 * <li>
046 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences
047 * to allow without generating a warning.
048 * Type is {@code int}.
049 * Default value is {@code 1}.
050 * </li>
051 * <li>
052 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks).
053 * Type is {@code java.util.regex.Pattern}.
054 * Default value is {@code "^""$"}.
055 * </li>
056 * <li>
057 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate
058 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to
059 * exclude syntactical contexts like annotations or static initializers from the check.
060 * Type is {@code java.lang.String[]}.
061 * Validation type is {@code tokenTypesSet}.
062 * Default value is {@code ANNOTATION}.
063 * </li>
064 * </ul>
065 * <p>
066 * To configure the check:
067 * </p>
068 * <pre>
069 * &lt;module name=&quot;MultipleStringLiterals&quot;/&gt;
070 * </pre>
071 * <p>
072 * To configure the check so that it allows two occurrences of each string:
073 * </p>
074 * <pre>
075 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
076 *   &lt;property name=&quot;allowedDuplicates&quot; value=&quot;2&quot;/&gt;
077 * &lt;/module&gt;
078 * </pre>
079 * <p>
080 * To configure the check so that it ignores ", " and empty strings:
081 * </p>
082 * <pre>
083 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
084 *   &lt;property name=&quot;ignoreStringsRegexp&quot;
085 *     value='^((&quot;&quot;)|(&quot;, &quot;))$'/&gt;
086 * &lt;/module&gt;
087 * </pre>
088 * <p>
089 * To configure the check so that it flags duplicate strings in all syntactical contexts,
090 * even in annotations like {@code @SuppressWarnings("unchecked")}:
091 * </p>
092 * <pre>
093 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
094 *   &lt;property name=&quot;ignoreOccurrenceContext&quot; value=&quot;&quot;/&gt;
095 * &lt;/module&gt;
096 * </pre>
097 * <p>
098 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
099 * </p>
100 * <p>
101 * Violation Message Keys:
102 * </p>
103 * <ul>
104 * <li>
105 * {@code multiple.string.literal}
106 * </li>
107 * </ul>
108 *
109 * @since 3.5
110 */
111@FileStatefulCheck
112public class MultipleStringLiteralsCheck extends AbstractCheck {
113
114    /**
115     * A key is pointing to the warning message text in "messages.properties"
116     * file.
117     */
118    public static final String MSG_KEY = "multiple.string.literal";
119
120    /**
121     * Compiled pattern for all system newlines.
122     */
123    private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R");
124
125    /**
126     * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL.
127     */
128    private static final String QUOTE = "\"";
129
130    /**
131     * The found strings and their tokens.
132     */
133    private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
134
135    /**
136     * Specify token type names where duplicate strings are ignored even if they
137     * don't match ignoredStringsRegexp. This allows you to exclude syntactical
138     * contexts like annotations or static initializers from the check.
139     */
140    private final BitSet ignoreOccurrenceContext = new BitSet();
141
142    /**
143     * Specify the maximum number of occurrences to allow without generating a warning.
144     */
145    private int allowedDuplicates = 1;
146
147    /**
148     * Specify RegExp for ignored strings (with quotation marks).
149     */
150    private Pattern ignoreStringsRegexp;
151
152    /**
153     * Construct an instance with default values.
154     */
155    public MultipleStringLiteralsCheck() {
156        setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
157        ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
158    }
159
160    /**
161     * Setter to specify the maximum number of occurrences to allow without generating a warning.
162     *
163     * @param allowedDuplicates The maximum number of duplicates.
164     */
165    public void setAllowedDuplicates(int allowedDuplicates) {
166        this.allowedDuplicates = allowedDuplicates;
167    }
168
169    /**
170     * Setter to specify RegExp for ignored strings (with quotation marks).
171     *
172     * @param ignoreStringsRegexp
173     *        regular expression pattern for ignored strings
174     * @noinspection WeakerAccess
175     */
176    public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
177        if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
178            this.ignoreStringsRegexp = null;
179        }
180        else {
181            this.ignoreStringsRegexp = ignoreStringsRegexp;
182        }
183    }
184
185    /**
186     * Setter to specify token type names where duplicate strings are ignored even
187     * if they don't match ignoredStringsRegexp. This allows you to exclude
188     * syntactical contexts like annotations or static initializers from the check.
189     *
190     * @param strRep the string representation of the tokens interested in
191     */
192    public final void setIgnoreOccurrenceContext(String... strRep) {
193        ignoreOccurrenceContext.clear();
194        for (final String s : strRep) {
195            final int type = TokenUtil.getTokenId(s);
196            ignoreOccurrenceContext.set(type);
197        }
198    }
199
200    @Override
201    public int[] getDefaultTokens() {
202        return getRequiredTokens();
203    }
204
205    @Override
206    public int[] getAcceptableTokens() {
207        return getRequiredTokens();
208    }
209
210    @Override
211    public int[] getRequiredTokens() {
212        return new int[] {
213            TokenTypes.STRING_LITERAL,
214            TokenTypes.TEXT_BLOCK_CONTENT,
215        };
216    }
217
218    @Override
219    public void visitToken(DetailAST ast) {
220        if (!isInIgnoreOccurrenceContext(ast)) {
221            final String currentString;
222            if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
223                final String strippedString =
224                    CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
225                // We need to add quotes here to be consistent with STRING_LITERAL text.
226                currentString = QUOTE + strippedString + QUOTE;
227            }
228            else {
229                currentString = ast.getText();
230            }
231            if (ignoreStringsRegexp == null
232                    || !ignoreStringsRegexp.matcher(currentString).find()) {
233                stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
234            }
235        }
236    }
237
238    /**
239     * Analyses the path from the AST root to a given AST for occurrences
240     * of the token types in {@link #ignoreOccurrenceContext}.
241     *
242     * @param ast the node from where to start searching towards the root node
243     * @return whether the path from the root node to ast contains one of the
244     *     token type in {@link #ignoreOccurrenceContext}.
245     */
246    private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
247        boolean isInIgnoreOccurrenceContext = false;
248        for (DetailAST token = ast;
249             token.getParent() != null;
250             token = token.getParent()) {
251            final int type = token.getType();
252            if (ignoreOccurrenceContext.get(type)) {
253                isInIgnoreOccurrenceContext = true;
254                break;
255            }
256        }
257        return isInIgnoreOccurrenceContext;
258    }
259
260    @Override
261    public void beginTree(DetailAST rootAST) {
262        stringMap.clear();
263    }
264
265    @Override
266    public void finishTree(DetailAST rootAST) {
267        for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
268            final List<DetailAST> hits = stringListEntry.getValue();
269            if (hits.size() > allowedDuplicates) {
270                final DetailAST firstFinding = hits.get(0);
271                final String recurringString =
272                    ALL_NEW_LINES.matcher(
273                        stringListEntry.getKey()).replaceAll("\\\\n");
274                log(firstFinding, MSG_KEY, recurringString, hits.size());
275            }
276        }
277    }
278}
279