001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2016 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.ArrayDeque;
023import java.util.Deque;
024import java.util.List;
025import java.util.Locale;
026import java.util.Set;
027import java.util.regex.Pattern;
028
029import org.apache.commons.lang3.ArrayUtils;
030
031import com.google.common.collect.ImmutableSortedSet;
032import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
033import com.puppycrawl.tools.checkstyle.api.DetailAST;
034import com.puppycrawl.tools.checkstyle.api.FileContents;
035import com.puppycrawl.tools.checkstyle.api.Scope;
036import com.puppycrawl.tools.checkstyle.api.TextBlock;
037import com.puppycrawl.tools.checkstyle.api.TokenTypes;
038import com.puppycrawl.tools.checkstyle.utils.CheckUtils;
039import com.puppycrawl.tools.checkstyle.utils.ScopeUtils;
040
041/**
042 * Custom Checkstyle Check to validate Javadoc.
043 *
044 * @author Chris Stillwell
045 * @author Daniel Grenner
046 * @author Travis Schneeberger
047 */
048public class JavadocStyleCheck
049    extends AbstractCheck {
050
051    /** Message property key for the Unclosed HTML message. */
052    public static final String MSG_JAVADOC_MISSING = "javadoc.missing";
053
054    /** Message property key for the Unclosed HTML message. */
055    public static final String MSG_EMPTY = "javadoc.empty";
056
057    /** Message property key for the Unclosed HTML message. */
058    public static final String MSG_NO_PERIOD = "javadoc.noPeriod";
059
060    /** Message property key for the Unclosed HTML message. */
061    public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";
062
063    /** Message property key for the Unclosed HTML message. */
064    public static final String MSG_UNCLOSED_HTML = "javadoc.unclosedHtml";
065
066    /** Message property key for the Extra HTML message. */
067    public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";
068
069    /** HTML tags that do not require a close tag. */
070    private static final Set<String> SINGLE_TAGS = ImmutableSortedSet.of(
071            "br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th");
072
073    /** HTML tags that are allowed in java docs.
074     * From http://www.w3schools.com/tags/default.asp
075     * The forms and structure tags are not allowed
076     */
077    private static final Set<String> ALLOWED_TAGS = ImmutableSortedSet.of(
078            "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
079            "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
080            "del", "div", "dfn", "dl", "dt", "em", "fieldset", "font", "h1",
081            "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
082            "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
083            "style", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
084            "thead", "tr", "tt", "u", "ul");
085
086    /** The scope to check. */
087    private Scope scope = Scope.PRIVATE;
088
089    /** The visibility scope where Javadoc comments shouldn't be checked. **/
090    private Scope excludeScope;
091
092    /** Format for matching the end of a sentence. */
093    private String endOfSentenceFormat = "([.?!][ \t\n\r\f<])|([.?!]$)";
094
095    /** Regular expression for matching the end of a sentence. */
096    private Pattern endOfSentencePattern;
097
098    /**
099     * Indicates if the first sentence should be checked for proper end of
100     * sentence punctuation.
101     */
102    private boolean checkFirstSentence = true;
103
104    /**
105     * Indicates if the HTML within the comment should be checked.
106     */
107    private boolean checkHtml = true;
108
109    /**
110     * Indicates if empty javadoc statements should be checked.
111     */
112    private boolean checkEmptyJavadoc;
113
114    @Override
115    public int[] getDefaultTokens() {
116        return getAcceptableTokens();
117    }
118
119    @Override
120    public int[] getAcceptableTokens() {
121        return new int[] {
122            TokenTypes.ANNOTATION_DEF,
123            TokenTypes.ANNOTATION_FIELD_DEF,
124            TokenTypes.CLASS_DEF,
125            TokenTypes.CTOR_DEF,
126            TokenTypes.ENUM_CONSTANT_DEF,
127            TokenTypes.ENUM_DEF,
128            TokenTypes.INTERFACE_DEF,
129            TokenTypes.METHOD_DEF,
130            TokenTypes.PACKAGE_DEF,
131            TokenTypes.VARIABLE_DEF,
132        };
133    }
134
135    @Override
136    public int[] getRequiredTokens() {
137        return ArrayUtils.EMPTY_INT_ARRAY;
138    }
139
140    @Override
141    public void visitToken(DetailAST ast) {
142        if (shouldCheck(ast)) {
143            final FileContents contents = getFileContents();
144            // Need to start searching for the comment before the annotations
145            // that may exist. Even if annotations are not defined on the
146            // package, the ANNOTATIONS AST is defined.
147            final TextBlock textBlock =
148                contents.getJavadocBefore(ast.getFirstChild().getLineNo());
149
150            checkComment(ast, textBlock);
151        }
152    }
153
154    /**
155     * Whether we should check this node.
156     * @param ast a given node.
157     * @return whether we should check a given node.
158     */
159    private boolean shouldCheck(final DetailAST ast) {
160        boolean check = false;
161
162        if (ast.getType() == TokenTypes.PACKAGE_DEF) {
163            check = getFileContents().inPackageInfo();
164        }
165        else if (!ScopeUtils.isInCodeBlock(ast)) {
166            final Scope customScope;
167
168            if (ScopeUtils.isInInterfaceOrAnnotationBlock(ast)
169                    || ast.getType() == TokenTypes.ENUM_CONSTANT_DEF) {
170                customScope = Scope.PUBLIC;
171            }
172            else {
173                customScope = ScopeUtils.getScopeFromMods(ast.findFirstToken(TokenTypes.MODIFIERS));
174            }
175            final Scope surroundingScope = ScopeUtils.getSurroundingScope(ast);
176
177            check = customScope.isIn(scope)
178                    && (surroundingScope == null || surroundingScope.isIn(scope))
179                    && (excludeScope == null
180                        || !customScope.isIn(excludeScope)
181                        || surroundingScope != null
182                            && !surroundingScope.isIn(excludeScope));
183        }
184        return check;
185    }
186
187    /**
188     * Performs the various checks against the Javadoc comment.
189     *
190     * @param ast the AST of the element being documented
191     * @param comment the source lines that make up the Javadoc comment.
192     *
193     * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
194     * @see #checkHtmlTags(DetailAST, TextBlock)
195     */
196    private void checkComment(final DetailAST ast, final TextBlock comment) {
197        if (comment == null) {
198            // checking for missing docs in JavadocStyleCheck is not consistent
199            // with the rest of CheckStyle...  Even though, I didn't think it
200            // made sense to make another check just to ensure that the
201            // package-info.java file actually contains package Javadocs.
202            if (getFileContents().inPackageInfo()) {
203                log(ast.getLineNo(), MSG_JAVADOC_MISSING);
204            }
205            return;
206        }
207
208        if (checkFirstSentence) {
209            checkFirstSentenceEnding(ast, comment);
210        }
211
212        if (checkHtml) {
213            checkHtmlTags(ast, comment);
214        }
215
216        if (checkEmptyJavadoc) {
217            checkJavadocIsNotEmpty(comment);
218        }
219    }
220
221    /**
222     * Checks that the first sentence ends with proper punctuation.  This method
223     * uses a regular expression that checks for the presence of a period,
224     * question mark, or exclamation mark followed either by whitespace, an
225     * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
226     * comments for TokenTypes that are valid for {_AT_inheritDoc}.
227     *
228     * @param ast the current node
229     * @param comment the source lines that make up the Javadoc comment.
230     */
231    private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
232        final String commentText = getCommentText(comment.getText());
233
234        if (!commentText.isEmpty()
235            && !getEndOfSentencePattern().matcher(commentText).find()
236            && !(commentText.startsWith("{@inheritDoc}")
237            && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
238            log(comment.getStartLineNo(), MSG_NO_PERIOD);
239        }
240    }
241
242    /**
243     * Checks that the Javadoc is not empty.
244     *
245     * @param comment the source lines that make up the Javadoc comment.
246     */
247    private void checkJavadocIsNotEmpty(TextBlock comment) {
248        final String commentText = getCommentText(comment.getText());
249
250        if (commentText.isEmpty()) {
251            log(comment.getStartLineNo(), MSG_EMPTY);
252        }
253    }
254
255    /**
256     * Returns the comment text from the Javadoc.
257     * @param comments the lines of Javadoc.
258     * @return a comment text String.
259     */
260    private static String getCommentText(String... comments) {
261        final StringBuilder builder = new StringBuilder();
262        for (final String line : comments) {
263            final int textStart = findTextStart(line);
264
265            if (textStart != -1) {
266                if (line.charAt(textStart) == '@') {
267                    //we have found the tag section
268                    break;
269                }
270                builder.append(line.substring(textStart));
271                trimTail(builder);
272                builder.append('\n');
273            }
274        }
275
276        return builder.toString().trim();
277    }
278
279    /**
280     * Finds the index of the first non-whitespace character ignoring the
281     * Javadoc comment start and end strings (&#47** and *&#47) as well as any
282     * leading asterisk.
283     * @param line the Javadoc comment line of text to scan.
284     * @return the int index relative to 0 for the start of text
285     *         or -1 if not found.
286     */
287    private static int findTextStart(String line) {
288        int textStart = -1;
289        for (int i = 0; i < line.length();) {
290            if (!Character.isWhitespace(line.charAt(i))) {
291                if (line.regionMatches(i, "/**", 0, "/**".length())) {
292                    i += 2;
293                }
294                else if (line.regionMatches(i, "*/", 0, 2)) {
295                    i++;
296                }
297                else if (line.charAt(i) != '*') {
298                    textStart = i;
299                    break;
300                }
301            }
302            i++;
303        }
304        return textStart;
305    }
306
307    /**
308     * Trims any trailing whitespace or the end of Javadoc comment string.
309     * @param builder the StringBuilder to trim.
310     */
311    private static void trimTail(StringBuilder builder) {
312        int index = builder.length() - 1;
313        while (true) {
314            if (Character.isWhitespace(builder.charAt(index))) {
315                builder.deleteCharAt(index);
316            }
317            else if (index > 0 && builder.charAt(index) == '/'
318                    && builder.charAt(index - 1) == '*') {
319                builder.deleteCharAt(index);
320                builder.deleteCharAt(index - 1);
321                index--;
322                while (builder.charAt(index - 1) == '*') {
323                    builder.deleteCharAt(index - 1);
324                    index--;
325                }
326            }
327            else {
328                break;
329            }
330            index--;
331        }
332    }
333
334    /**
335     * Checks the comment for HTML tags that do not have a corresponding close
336     * tag or a close tag that has no previous open tag.  This code was
337     * primarily copied from the DocCheck checkHtml method.
338     *
339     * @param ast the node with the Javadoc
340     * @param comment the {@code TextBlock} which represents
341     *                 the Javadoc comment.
342     */
343    private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
344        final int lineNo = comment.getStartLineNo();
345        final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
346        final String[] text = comment.getText();
347
348        final TagParser parser = new TagParser(text, lineNo);
349
350        while (parser.hasNextTag()) {
351            final HtmlTag tag = parser.nextTag();
352
353            if (tag.isIncompleteTag()) {
354                log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
355                    text[tag.getLineNo() - lineNo]);
356                return;
357            }
358            if (tag.isClosedTag()) {
359                //do nothing
360                continue;
361            }
362            if (tag.isCloseTag()) {
363                // We have found a close tag.
364                if (isExtraHtml(tag.getId(), htmlStack)) {
365                    // No corresponding open tag was found on the stack.
366                    log(tag.getLineNo(),
367                        tag.getPosition(),
368                        MSG_EXTRA_HTML,
369                        tag);
370                }
371                else {
372                    // See if there are any unclosed tags that were opened
373                    // after this one.
374                    checkUnclosedTags(htmlStack, tag.getId());
375                }
376            }
377            else {
378                //We only push html tags that are allowed
379                if (isAllowedTag(tag)) {
380                    htmlStack.push(tag);
381                }
382            }
383        }
384
385        // Identify any tags left on the stack.
386        // Skip multiples, like <b>...<b>
387        String lastFound = "";
388        final List<String> typeParameters = CheckUtils.getTypeParameterNames(ast);
389        for (final HtmlTag htmlTag : htmlStack) {
390            if (!isSingleTag(htmlTag)
391                && !htmlTag.getId().equals(lastFound)
392                && !typeParameters.contains(htmlTag.getId())) {
393                log(htmlTag.getLineNo(), htmlTag.getPosition(), MSG_UNCLOSED_HTML, htmlTag);
394                lastFound = htmlTag.getId();
395            }
396        }
397    }
398
399    /**
400     * Checks to see if there are any unclosed tags on the stack.  The token
401     * represents a html tag that has been closed and has a corresponding open
402     * tag on the stack.  Any tags, except single tags, that were opened
403     * (pushed on the stack) after the token are missing a close.
404     *
405     * @param htmlStack the stack of opened HTML tags.
406     * @param token the current HTML tag name that has been closed.
407     */
408    private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
409        final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
410        HtmlTag lastOpenTag = htmlStack.pop();
411        while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
412            // Find unclosed elements. Put them on a stack so the
413            // output order won't be back-to-front.
414            if (isSingleTag(lastOpenTag)) {
415                lastOpenTag = htmlStack.pop();
416            }
417            else {
418                unclosedTags.push(lastOpenTag);
419                lastOpenTag = htmlStack.pop();
420            }
421        }
422
423        // Output the unterminated tags, if any
424        // Skip multiples, like <b>..<b>
425        String lastFound = "";
426        for (final HtmlTag htag : unclosedTags) {
427            lastOpenTag = htag;
428            if (lastOpenTag.getId().equals(lastFound)) {
429                continue;
430            }
431            lastFound = lastOpenTag.getId();
432            log(lastOpenTag.getLineNo(),
433                lastOpenTag.getPosition(),
434                MSG_UNCLOSED_HTML,
435                lastOpenTag);
436        }
437    }
438
439    /**
440     * Determines if the HtmlTag is one which does not require a close tag.
441     *
442     * @param tag the HtmlTag to check.
443     * @return {@code true} if the HtmlTag is a single tag.
444     */
445    private static boolean isSingleTag(HtmlTag tag) {
446        // If its a singleton tag (<p>, <br>, etc.), ignore it
447        // Can't simply not put them on the stack, since singletons
448        // like <dt> and <dd> (unhappily) may either be terminated
449        // or not terminated. Both options are legal.
450        return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
451    }
452
453    /**
454     * Determines if the HtmlTag is one which is allowed in a javadoc.
455     *
456     * @param tag the HtmlTag to check.
457     * @return {@code true} if the HtmlTag is an allowed html tag.
458     */
459    private static boolean isAllowedTag(HtmlTag tag) {
460        return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
461    }
462
463    /**
464     * Determines if the given token is an extra HTML tag. This indicates that
465     * a close tag was found that does not have a corresponding open tag.
466     *
467     * @param token an HTML tag id for which a close was found.
468     * @param htmlStack a Stack of previous open HTML tags.
469     * @return {@code false} if a previous open tag was found
470     *         for the token.
471     */
472    private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
473        boolean isExtra = true;
474        for (final HtmlTag tag : htmlStack) {
475            // Loop, looking for tags that are closed.
476            // The loop is needed in case there are unclosed
477            // tags on the stack. In that case, the stack would
478            // not be empty, but this tag would still be extra.
479            if (token.equalsIgnoreCase(tag.getId())) {
480                isExtra = false;
481                break;
482            }
483        }
484
485        return isExtra;
486    }
487
488    /**
489     * Sets the scope to check.
490     * @param from string to get the scope from
491     */
492    public void setScope(String from) {
493        scope = Scope.getInstance(from);
494    }
495
496    /**
497     * Set the excludeScope.
498     * @param excludeScope a {@code String} value
499     */
500    public void setExcludeScope(String excludeScope) {
501        this.excludeScope = Scope.getInstance(excludeScope);
502    }
503
504    /**
505     * Set the format for matching the end of a sentence.
506     * @param format format for matching the end of a sentence.
507     */
508    public void setEndOfSentenceFormat(String format) {
509        endOfSentenceFormat = format;
510    }
511
512    /**
513     * Returns a regular expression for matching the end of a sentence.
514     *
515     * @return a regular expression for matching the end of a sentence.
516     */
517    private Pattern getEndOfSentencePattern() {
518        if (endOfSentencePattern == null) {
519            endOfSentencePattern = Pattern.compile(endOfSentenceFormat);
520        }
521        return endOfSentencePattern;
522    }
523
524    /**
525     * Sets the flag that determines if the first sentence is checked for
526     * proper end of sentence punctuation.
527     * @param flag {@code true} if the first sentence is to be checked
528     */
529    public void setCheckFirstSentence(boolean flag) {
530        checkFirstSentence = flag;
531    }
532
533    /**
534     * Sets the flag that determines if HTML checking is to be performed.
535     * @param flag {@code true} if HTML checking is to be performed.
536     */
537    public void setCheckHtml(boolean flag) {
538        checkHtml = flag;
539    }
540
541    /**
542     * Sets the flag that determines if empty Javadoc checking should be done.
543     * @param flag {@code true} if empty Javadoc checking should be done.
544     */
545    public void setCheckEmptyJavadoc(boolean flag) {
546        checkEmptyJavadoc = flag;
547    }
548}