From 48efad6a2bd3b8adb34846fbffe81adbbc2f8a07 Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Sun, 11 Oct 2020 17:51:06 +0200 Subject: [PATCH 1/3] Is. #839 - 2nd look - add new message for 'blank' title This is a compromise - 1. Keep insertion of a blank title tag, if none. 2. Add new warn if title tag is blank, in html5 modified: include/tidyenum.h modified: src/language_en.h modified: src/message.c modified: src/parser.c --- include/tidyenum.h | 3 ++- src/language_en.h | 1 + src/message.c | 2 ++ src/parser.c | 11 ++++++++++- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/tidyenum.h b/include/tidyenum.h index 3daee5b..76ae07d 100644 --- a/include/tidyenum.h +++ b/include/tidyenum.h @@ -282,7 +282,8 @@ extern "C" { FN(VENDOR_SPECIFIC_CHARS) \ FN(WHITE_IN_URI) \ FN(XML_DECLARATION_DETECTED) \ - FN(XML_ID_SYNTAX) + FN(XML_ID_SYNTAX) \ + FN(BLANK_TITLE_ELEMENT) /** These are report messages added by Tidy's accessibility module. diff --git a/src/language_en.h b/src/language_en.h index 8d0eb7a..f93324e 100644 --- a/src/language_en.h +++ b/src/language_en.h @@ -2055,6 +2055,7 @@ static languageDefinition language_en = { whichPluralForm_en, { { WHITE_IN_URI, 0, "%s discarding whitespace in URI reference" }, { XML_DECLARATION_DETECTED, 0, "An XML declaration was detected. Did you mean to use input-xml?" }, { XML_ID_SYNTAX, 0, "%s ID \"%s\" uses XML ID syntax" }, + { BLANK_TITLE_ELEMENT, 0, "blank 'title' element" }, /*************************************** diff --git a/src/message.c b/src/message.c index ee2e6c6..de0124b 100644 --- a/src/message.c +++ b/src/message.c @@ -372,6 +372,7 @@ static struct _dispatchTable { { WHITE_IN_URI, TidyWarning, formatAttributeReport }, { XML_DECLARATION_DETECTED, TidyWarning, formatStandard }, { XML_ID_SYNTAX, TidyWarning, formatAttributeReport }, + { BLANK_TITLE_ELEMENT, TidyWarning, formatStandard }, { APPLET_MISSING_ALT, TidyAccess, formatAccessReport }, { AREA_MISSING_ALT, TidyAccess, formatAccessReport }, @@ -819,6 +820,7 @@ TidyMessageImpl *formatStandard(TidyDocImpl* doc, Node *element, Node *node, uin case NESTED_QUOTATION: case SUSPECTED_MISSING_QUOTE: case XML_DECLARATION_DETECTED: + case BLANK_TITLE_ELEMENT: return TY_(tidyMessageCreateWithNode)(doc, rpt, code, level ); case ELEMENT_NOT_EMPTY: diff --git a/src/parser.c b/src/parser.c index 8569ed5..cda3200 100644 --- a/src/parser.c +++ b/src/parser.c @@ -4713,7 +4713,8 @@ void TY_(ParseDocument)(TidyDocImpl* doc) TY_(ParseHTML)(doc, html, IgnoreWhitespace); } - if (!TY_(FindTITLE)(doc)) + node = TY_(FindTITLE)(doc); + if (!node) { Node* head = TY_(FindHEAD)(doc); /* #72, avoid MISSING_TITLE_ELEMENT if show-body-only (but allow InsertNodeAtEnd to avoid new warning) */ @@ -4723,6 +4724,14 @@ void TY_(ParseDocument)(TidyDocImpl* doc) } TY_(InsertNodeAtEnd)(head, TY_(InferredTag)(doc, TidyTag_TITLE)); } + else if (!node->content && !showingBodyOnly(doc)) + { + /* Is #839 - warn node is blank in HTML5 */ + if (TY_(IsHTML5Mode)(doc)) + { + TY_(Report)(doc, node, NULL, BLANK_TITLE_ELEMENT); + } + } AttributeChecks(doc, &doc->root); ReplaceObsoleteElements(doc, &doc->root); From cf43bd3448b3a74b5319708d526841531f7af81b Mon Sep 17 00:00:00 2001 From: Jim Derry Date: Wed, 30 Jun 2021 11:08:13 -0400 Subject: [PATCH 2/3] Update existing regression testing results to accommodate new behavior. This is justified because we're simply reporting a new condition without changing output. --- regression_testing/cases/legacy-expects/case-1062511.txt | 3 ++- regression_testing/cases/legacy-expects/case-1674502.txt | 3 ++- regression_testing/cases/legacy-expects/case-1773932.txt | 3 ++- regression_testing/cases/legacy-expects/case-427675.txt | 3 ++- regression_testing/cases/legacy-expects/case-427676.txt | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/regression_testing/cases/legacy-expects/case-1062511.txt b/regression_testing/cases/legacy-expects/case-1062511.txt index f2b2729..fbcacd2 100644 --- a/regression_testing/cases/legacy-expects/case-1062511.txt +++ b/regression_testing/cases/legacy-expects/case-1062511.txt @@ -8,10 +8,11 @@ line 7 column 1 - Warning: isn't allowed in elements line 6 column 1 - Info: previously mentioned line 6 column 1 - Warning: missing before
line 3 column 1 - Warning: content occurs after end of body +line 2 column 1 - Warning: blank 'title' element line 3 column 1 - Warning: trimming empty line 6 column 1 - Warning: trimming empty Info: Document content looks like HTML5 -Tidy found 11 warnings and 0 errors! +Tidy found 12 warnings and 0 errors! One or more empty elements were present in the source document but dropped on output. If these elements are necessary or you don't want diff --git a/regression_testing/cases/legacy-expects/case-1674502.txt b/regression_testing/cases/legacy-expects/case-1674502.txt index e2511f8..0eb188e 100644 --- a/regression_testing/cases/legacy-expects/case-1674502.txt +++ b/regression_testing/cases/legacy-expects/case-1674502.txt @@ -1,7 +1,8 @@ line 1 column 1 - Warning: missing declaration line 7 column 1 - Warning: discarding unexpected XML declaration +line 3 column 1 - Warning: blank 'title' element Info: Document content looks like HTML5 -Tidy found 2 warnings and 0 errors! +Tidy found 3 warnings and 0 errors! About HTML Tidy: https://github.com/htacg/tidy-html5 Bug reports and comments: https://github.com/htacg/tidy-html5/issues diff --git a/regression_testing/cases/legacy-expects/case-1773932.txt b/regression_testing/cases/legacy-expects/case-1773932.txt index 008c5e0..626c84f 100644 --- a/regression_testing/cases/legacy-expects/case-1773932.txt +++ b/regression_testing/cases/legacy-expects/case-1773932.txt @@ -3,9 +3,10 @@ line 7 column 1 - Warning: missing before
    line 8 column 9 - Warning: inserting implicit line 9 column 10 - Warning: inserting implicit line 10 column 10 - Warning: inserting implicit +line 3 column 1 - Warning: blank 'title' element line 7 column 1 - Warning: trimming empty Info: Document content looks like HTML5 -Tidy found 6 warnings and 0 errors! +Tidy found 7 warnings and 0 errors! You are recommended to use CSS to specify the font and properties such as its size and color. This will reduce diff --git a/regression_testing/cases/legacy-expects/case-427675.txt b/regression_testing/cases/legacy-expects/case-427675.txt index bd4751f..98b63c2 100644 --- a/regression_testing/cases/legacy-expects/case-427675.txt +++ b/regression_testing/cases/legacy-expects/case-427675.txt @@ -1,11 +1,12 @@ line 1 column 1 - Warning: missing declaration line 8 column 1 - Warning: discarding unexpected line 10 column 1 - Warning: missing +line 3 column 3 - Warning: blank 'title' element line 5 column 1 - Warning: element removed from HTML5 line 6 column 3 - Warning: element removed from HTML5 line 10 column 1 - Warning: element removed from HTML5 Info: Document content looks like HTML5 -Tidy found 6 warnings and 0 errors! +Tidy found 7 warnings and 0 errors! About HTML Tidy: https://github.com/htacg/tidy-html5 Bug reports and comments: https://github.com/htacg/tidy-html5/issues diff --git a/regression_testing/cases/legacy-expects/case-427676.txt b/regression_testing/cases/legacy-expects/case-427676.txt index d391525..2dd11ad 100644 --- a/regression_testing/cases/legacy-expects/case-427676.txt +++ b/regression_testing/cases/legacy-expects/case-427676.txt @@ -5,9 +5,10 @@ line 6 column 14 - Error: <spanstyle> is not recognized! line 6 column 14 - Warning: discarding unexpected <spanstyle> line 6 column 44 - Warning: discarding unexpected </span> line 6 column 52 - Info: value for attribute "href" missing quote marks +line 3 column 3 - Warning: blank 'title' element line 6 column 52 - Warning: <a> illegal characters found in URI Info: Document content looks like HTML5 -Tidy found 6 warnings and 1 error! +Tidy found 7 warnings and 1 error! This document has errors that must be fixed before using HTML Tidy to generate a tidied up version. From ad8cae97bd15e7ee90d742eb6cd1f3892f2a559b Mon Sep 17 00:00:00 2001 From: Jim Derry <balthisar@gmail.com> Date: Wed, 30 Jun 2021 11:17:41 -0400 Subject: [PATCH 3/3] Added test case for #839. --- .../cases/github-cases/case-839@1.html | 12 ++++++++++++ .../cases/github-expects/case-839.html | 19 +++++++++++++++++++ .../cases/github-expects/case-839.txt | 14 ++++++++++++++ 3 files changed, 45 insertions(+) create mode 100755 regression_testing/cases/github-cases/case-839@1.html create mode 100644 regression_testing/cases/github-expects/case-839.html create mode 100644 regression_testing/cases/github-expects/case-839.txt diff --git a/regression_testing/cases/github-cases/case-839@1.html b/regression_testing/cases/github-cases/case-839@1.html new file mode 100755 index 0000000..0f962c8 --- /dev/null +++ b/regression_testing/cases/github-cases/case-839@1.html @@ -0,0 +1,12 @@ +<!-- +This test case represents HTML Tidy issue #839, which describes +a descrepancy between tidy's behavior and the W3C Nu HTML +checker behavior. Tidy has historically allowed empty <title> +elements, but this is not allowed by HTML5, and is flagged by +the Nu checker. Because a <title> is required by HTML5, it +would be even worse for HTML Tidy to remove this element, and +so Tidy will now provide a warning if the title element is +empty. It is assumed the HTML author will add a title as well +as addressing other warnings. + --> +<!DOCTYPE html><title></title> diff --git a/regression_testing/cases/github-expects/case-839.html b/regression_testing/cases/github-expects/case-839.html new file mode 100644 index 0000000..d32bc5c --- /dev/null +++ b/regression_testing/cases/github-expects/case-839.html @@ -0,0 +1,19 @@ +<!-- +This test case represents HTML Tidy issue #839, which describes +a descrepancy between tidy's behavior and the W3C Nu HTML +checker behavior. Tidy has historically allowed empty <title> +elements, but this is not allowed by HTML5, and is flagged by +the Nu checker. Because a <title> is required by HTML5, it +would be even worse for HTML Tidy to remove this element, and +so Tidy will now provide a warning if the title element is +empty. It is assumed the HTML author will add a title as well +as addressing other warnings. + --> +<!DOCTYPE html> +<html> +<head> + <title></title> +</head> +<body> +</body> +</html> diff --git a/regression_testing/cases/github-expects/case-839.txt b/regression_testing/cases/github-expects/case-839.txt new file mode 100644 index 0000000..7a8a19c --- /dev/null +++ b/regression_testing/cases/github-expects/case-839.txt @@ -0,0 +1,14 @@ +line 12 column 16 - Warning: blank 'title' element +Info: Document content looks like HTML5 +Tidy found 1 warning and 0 errors! + +About HTML Tidy: https://github.com/htacg/tidy-html5 +Bug reports and comments: https://github.com/htacg/tidy-html5/issues +Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ +Latest HTML specification: http://dev.w3.org/html5/spec-author-view/ +Validate your HTML documents: http://validator.w3.org/nu/ +Lobby your company to join the W3C: http://www.w3.org/Consortium + +Do you speak a language other than English, or a different variant of +English? Consider helping us to localize HTML Tidy. For details please see +https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md