Implement TODO:

- tidyDetectedHtmlVersion()
- tidyDetectedXhtml()
- added two new fields to W3C_Doctypes[] in order to simplify this.
- added TY_(HTMLVersionNumberFromCode)() to enable lookup.
- Implement tidyDetectedGenericXml()
- Added a warning message if an XML declaration exists but the document is not
  XHTML.
- Remove dead commented code.
- Updated POs and POT. Headers not affected, but translators should check
  their translations.
- Testing is clean on Mac OS X, Ubuntu 16.04, and Windows 10.
This commit is contained in:
Jim Derry 2017-03-19 15:41:51 -04:00
parent 068e6bf42a
commit a4f752f274
15 changed files with 137 additions and 92 deletions

View file

@ -410,28 +410,24 @@ TIDY_EXPORT ctmbstr TIDY_CALL tidyLibraryVersion(void);
*/ */
TIDY_EXPORT int TIDY_CALL tidyStatus( TidyDoc tdoc ); TIDY_EXPORT int TIDY_CALL tidyStatus( TidyDoc tdoc );
/** This function is supposed to return the detected HTML version; however it /** Gets the version of HTML that was output, as an integer, times 100. For
** isn't currently implemented. ** example, HTML5 will return 500; HTML4.0.1 will return 401.
** @note TODO: This function currently does nothing.
** @param tdoc An instance of a TidyDoc to query. ** @param tdoc An instance of a TidyDoc to query.
** @result Returns the value `0`. ** @result Returns the HTML version number (x100).
*/ */
TIDY_EXPORT int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc tdoc ); TIDY_EXPORT int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc tdoc );
/** This function is supposed to return a Bool indicating whether or not the /** Indicates whether the output document is or isn't XHTML.
** input document is XHTML; however it isn't currently implemented.
** @note TODO: This function currently does nothing.
** @param tdoc An instance of a TidyDoc to query. ** @param tdoc An instance of a TidyDoc to query.
** @result Returns the value `no`. ** @result Returns `yes` if the document is an XHTML type.
*/ */
TIDY_EXPORT Bool TIDY_CALL tidyDetectedXhtml( TidyDoc tdoc ); TIDY_EXPORT Bool TIDY_CALL tidyDetectedXhtml( TidyDoc tdoc );
/** This function is supposed to return a Bool indicating whether or not the /** Indicates whether or not the input document was XML. If TidyXml tags is
** input document is generic XML (i.e., not XHTL or HTML); however it isn't ** true, or there was an XML declaration in the input document, then this
** currently implemented. ** function will return yes.
** @note TODO: This function currently does nothing.
** @param tdoc An instance of a TidyDoc to query. ** @param tdoc An instance of a TidyDoc to query.
** @result Returns the value `no`. ** @result Returns `yes` if the input document was XML.
*/ */
TIDY_EXPORT Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc tdoc ); TIDY_EXPORT Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc tdoc );

View file

@ -881,6 +881,7 @@ typedef enum
FN(COERCE_TO_ENDTAG) \ FN(COERCE_TO_ENDTAG) \
FN(CONTENT_AFTER_BODY) \ FN(CONTENT_AFTER_BODY) \
FN(DISCARDING_UNEXPECTED) \ FN(DISCARDING_UNEXPECTED) \
FN(XML_DECLARATION_DETECTED) \
FN(DOCTYPE_AFTER_TAGS) \ FN(DOCTYPE_AFTER_TAGS) \
FN(DTYPE_NOT_UPPER_CASE) \ FN(DTYPE_NOT_UPPER_CASE) \
FN(DUPLICATE_FRAMESET) \ FN(DUPLICATE_FRAMESET) \

View file

@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=n != 1;\n" "Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: HTML Tidy poconvert.rb\n" "X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n" "Project-Id-Version: \n"
"PO-Revision-Date: 2017-03-15 17:35:16\n" "PO-Revision-Date: 2017-03-19 13:23:38\n"
"Last-Translator: jderry\n" "Last-Translator: jderry\n"
"Language-Team: \n" "Language-Team: \n"
@ -272,7 +272,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE1" msgctxt "TEXT_ACCESS_ADVICE1"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL.\n" "see http://www.w3.org/WAI/GL."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -280,7 +280,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE2" msgctxt "TEXT_ACCESS_ADVICE2"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" "see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -322,7 +322,7 @@ msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
msgctxt "TEXT_USING_BODY" msgctxt "TEXT_USING_BODY"
msgid "You are recommended to use CSS to specify page and link colors\n" msgid "You are recommended to use CSS to specify page and link colors"
msgstr "You are recommended to use CSS to specify page and link colours\n" msgstr "You are recommended to use CSS to specify page and link colours\n"
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -658,6 +658,10 @@ msgctxt "BAD_SUMMARY_HTML5"
msgid "The summary attribute on the %s element is obsolete in HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5"
msgstr "" msgstr ""
msgctxt "XML_DECLARATION_DETECTED"
msgid "An XML declaration was detected. Did you mean to use input-xml?"
msgstr ""
#, c-format #, c-format
msgctxt "TRIM_EMPTY_ELEMENT" msgctxt "TRIM_EMPTY_ELEMENT"
msgid "trimming empty %s" msgid "trimming empty %s"

View file

@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=n != 1;\n" "Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: HTML Tidy poconvert.rb\n" "X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n" "Project-Id-Version: \n"
"PO-Revision-Date: 2017-03-15 17:35:16\n" "PO-Revision-Date: 2017-03-19 13:23:38\n"
"Last-Translator: jderry\n" "Last-Translator: jderry\n"
"Language-Team: \n" "Language-Team: \n"
@ -272,7 +272,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE1" msgctxt "TEXT_ACCESS_ADVICE1"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL.\n" "see http://www.w3.org/WAI/GL."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -280,7 +280,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE2" msgctxt "TEXT_ACCESS_ADVICE2"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" "see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -318,7 +318,7 @@ msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
msgctxt "TEXT_USING_BODY" msgctxt "TEXT_USING_BODY"
msgid "You are recommended to use CSS to specify page and link colors\n" msgid "You are recommended to use CSS to specify page and link colors"
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -654,6 +654,10 @@ msgctxt "BAD_SUMMARY_HTML5"
msgid "The summary attribute on the %s element is obsolete in HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5"
msgstr "" msgstr ""
msgctxt "XML_DECLARATION_DETECTED"
msgid "An XML declaration was detected. Did you mean to use input-xml?"
msgstr ""
#, c-format #, c-format
msgctxt "TRIM_EMPTY_ELEMENT" msgctxt "TRIM_EMPTY_ELEMENT"
msgid "trimming empty %s" msgid "trimming empty %s"

View file

@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=n != 1;\n" "Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: HTML Tidy poconvert.rb\n" "X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n" "Project-Id-Version: \n"
"PO-Revision-Date: 2017-03-15 17:35:16\n" "PO-Revision-Date: 2017-03-19 13:23:38\n"
"Last-Translator: jderry\n" "Last-Translator: jderry\n"
"Language-Team: \n" "Language-Team: \n"
@ -272,7 +272,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE1" msgctxt "TEXT_ACCESS_ADVICE1"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL.\n" "see http://www.w3.org/WAI/GL."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -280,7 +280,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE2" msgctxt "TEXT_ACCESS_ADVICE2"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" "see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -318,7 +318,7 @@ msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
msgctxt "TEXT_USING_BODY" msgctxt "TEXT_USING_BODY"
msgid "You are recommended to use CSS to specify page and link colors\n" msgid "You are recommended to use CSS to specify page and link colors"
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -654,6 +654,10 @@ msgctxt "BAD_SUMMARY_HTML5"
msgid "The summary attribute on the %s element is obsolete in HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5"
msgstr "" msgstr ""
msgctxt "XML_DECLARATION_DETECTED"
msgid "An XML declaration was detected. Did you mean to use input-xml?"
msgstr ""
#, c-format #, c-format
msgctxt "TRIM_EMPTY_ELEMENT" msgctxt "TRIM_EMPTY_ELEMENT"
msgid "trimming empty %s" msgid "trimming empty %s"

View file

@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=(n > 1);\n" "Plural-Forms: nplurals=2; plural=(n > 1);\n"
"X-Generator: HTML Tidy poconvert.rb\n" "X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n" "Project-Id-Version: \n"
"PO-Revision-Date: 2017-03-15 17:35:16\n" "PO-Revision-Date: 2017-03-19 13:23:38\n"
"Last-Translator: jderry\n" "Last-Translator: jderry\n"
"Language-Team: \n" "Language-Team: \n"
@ -377,7 +377,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE1" msgctxt "TEXT_ACCESS_ADVICE1"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL.\n" "see http://www.w3.org/WAI/GL."
msgstr "" msgstr ""
"Pour plus d'informations sur la façon de rendre vos pages\n" "Pour plus d'informations sur la façon de rendre vos pages\n"
"accessibles, voir http://www.w3.org/WAI/GL" "accessibles, voir http://www.w3.org/WAI/GL"
@ -387,7 +387,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE2" msgctxt "TEXT_ACCESS_ADVICE2"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" "see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/."
msgstr "et http://www.html-tidy.org/Accessibility/" msgstr "et http://www.html-tidy.org/Accessibility/"
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -442,7 +442,7 @@ msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
msgctxt "TEXT_USING_BODY" msgctxt "TEXT_USING_BODY"
msgid "You are recommended to use CSS to specify page and link colors\n" msgid "You are recommended to use CSS to specify page and link colors"
msgstr "" msgstr ""
"Il est recommandé d'utiliser les CSS pour spécifier la page et de liaison des " "Il est recommandé d'utiliser les CSS pour spécifier la page et de liaison des "
"couleurs\n" "couleurs\n"
@ -791,6 +791,10 @@ msgctxt "BAD_SUMMARY_HTML5"
msgid "The summary attribute on the %s element is obsolete in HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5"
msgstr "L'attribut summary sur l'élément du %s est obsolète dans HTML5" msgstr "L'attribut summary sur l'élément du %s est obsolète dans HTML5"
msgctxt "XML_DECLARATION_DETECTED"
msgid "An XML declaration was detected. Did you mean to use input-xml?"
msgstr ""
#, c-format #, c-format
msgctxt "TRIM_EMPTY_ELEMENT" msgctxt "TRIM_EMPTY_ELEMENT"
msgid "trimming empty %s" msgid "trimming empty %s"

View file

@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=1; plural=0;\n" "Plural-Forms: nplurals=1; plural=0;\n"
"X-Generator: HTML Tidy poconvert.rb\n" "X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n" "Project-Id-Version: \n"
"PO-Revision-Date: 2017-03-15 17:35:16\n" "PO-Revision-Date: 2017-03-19 13:23:38\n"
"Last-Translator: jderry\n" "Last-Translator: jderry\n"
"Language-Team: \n" "Language-Team: \n"
@ -270,7 +270,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE1" msgctxt "TEXT_ACCESS_ADVICE1"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL.\n" "see http://www.w3.org/WAI/GL."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -278,7 +278,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE2" msgctxt "TEXT_ACCESS_ADVICE2"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" "see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -316,7 +316,7 @@ msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
msgctxt "TEXT_USING_BODY" msgctxt "TEXT_USING_BODY"
msgid "You are recommended to use CSS to specify page and link colors\n" msgid "You are recommended to use CSS to specify page and link colors"
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -648,6 +648,10 @@ msgctxt "BAD_SUMMARY_HTML5"
msgid "The summary attribute on the %s element is obsolete in HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5"
msgstr "" msgstr ""
msgctxt "XML_DECLARATION_DETECTED"
msgid "An XML declaration was detected. Did you mean to use input-xml?"
msgstr ""
#, c-format #, c-format
msgctxt "TRIM_EMPTY_ELEMENT" msgctxt "TRIM_EMPTY_ELEMENT"
msgid "trimming empty %s" msgid "trimming empty %s"

View file

@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=n != 1;\n" "Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: HTML Tidy poconvert.rb\n" "X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n" "Project-Id-Version: \n"
"POT-Creation-Date: 2017-03-15 17:35:16\n" "POT-Creation-Date: 2017-03-19 13:23:38\n"
"Last-Translator: jderry\n" "Last-Translator: jderry\n"
"Language-Team: \n" "Language-Team: \n"
@ -272,7 +272,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE1" msgctxt "TEXT_ACCESS_ADVICE1"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL.\n" "see http://www.w3.org/WAI/GL."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -280,7 +280,7 @@ msgstr ""
msgctxt "TEXT_ACCESS_ADVICE2" msgctxt "TEXT_ACCESS_ADVICE2"
msgid "" msgid ""
"For further advice on how to make your pages accessible\n" "For further advice on how to make your pages accessible\n"
"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" "see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/."
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -318,7 +318,7 @@ msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
msgctxt "TEXT_USING_BODY" msgctxt "TEXT_USING_BODY"
msgid "You are recommended to use CSS to specify page and link colors\n" msgid "You are recommended to use CSS to specify page and link colors"
msgstr "" msgstr ""
#. This console output should be limited to 78 characters per line. #. This console output should be limited to 78 characters per line.
@ -650,6 +650,10 @@ msgctxt "BAD_SUMMARY_HTML5"
msgid "The summary attribute on the %s element is obsolete in HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5"
msgstr "" msgstr ""
msgctxt "XML_DECLARATION_DETECTED"
msgid "An XML declaration was detected. Did you mean to use input-xml?"
msgstr ""
#, c-format #, c-format
msgctxt "TRIM_EMPTY_ELEMENT" msgctxt "TRIM_EMPTY_ELEMENT"
msgid "trimming empty %s" msgid "trimming empty %s"

View file

@ -293,32 +293,31 @@ static languageDefinition language_en = { whichPluralForm_en, {
/*************************************** /***************************************
** Message Severity Level ** Message Severity Level
***************************************/ ***************************************/
{ TidyInfo, 0, "Info: " }, { TidyInfo, 0, "Info: " },
{ TidyWarning, 0, "Warning: " }, { TidyWarning, 0, "Warning: " },
{ TidyConfig, 0, "Config: " }, { TidyConfig, 0, "Config: " },
{ TidyAccess, 0, "Access: " }, { TidyAccess, 0, "Access: " },
{ TidyError, 0, "Error: " }, { TidyError, 0, "Error: " },
{ TidyBadDocument, 0, "Document: " }, { TidyBadDocument, 0, "Document: " },
{ TidyFatal, 0, "Panic: " }, { TidyFatal, 0, "Panic: " },
{ TidyDialogueInfo, 0, "Information: " }, { TidyDialogueInfo, 0, "Information: " },
{ TidyDialogueSummary, 0, "Summary: " }, { TidyDialogueSummary, 0, "Summary: " },
{ TidyDialogueDoc, 0, "Document: " }, { TidyDialogueDoc, 0, "Document: " },
/*************************************** /***************************************
** Warnings and Errors ** Warnings and Errors
***************************************/ ***************************************/
/* ReportEncodingWarning */ /* ReportEncodingWarning */
{ { ENCODING_MISMATCH, 0, "specified input encoding (%s) does not match actual input encoding (%s)" }, /* Warning */
ENCODING_MISMATCH, 0, "specified input encoding (%s) does not match actual input encoding (%s)" }, /* Warning */
/* ReportEncodingError */ /* ReportEncodingError */
{ VENDOR_SPECIFIC_CHARS, 0, "%s invalid character code %s" }, /* Error */ { VENDOR_SPECIFIC_CHARS, 0, "%s invalid character code %s" }, /* Error */
{ INVALID_SGML_CHARS, 0, "%s invalid character code %s" }, /* Error */ { INVALID_SGML_CHARS, 0, "%s invalid character code %s" }, /* Error */
{ INVALID_UTF8, 0, "%s invalid UTF-8 bytes (char. code %s)" }, /* Error */ { INVALID_UTF8, 0, "%s invalid UTF-8 bytes (char. code %s)" }, /* Error */
{ INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" }, /* Error */ { INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" }, /* Error */
{ INVALID_NCR, 0, "%s invalid numeric character reference %s" }, /* Error */ { INVALID_NCR, 0, "%s invalid numeric character reference %s" }, /* Error */
{ BAD_SURROGATE_PAIR, 0, "Have out-of-range surrogate pair U+%04X:U+%04X, replaced with U+FFFD value."}, /* warning */ { BAD_SURROGATE_PAIR, 0, "Have out-of-range surrogate pair U+%04X:U+%04X, replaced with U+FFFD value." }, /* warning */
{ BAD_SURROGATE_TAIL, 0, "Leading (High) surrogate pair U+%04X, with no trailing (Low) entity, replaced with U+FFFD." }, /* warning */ { BAD_SURROGATE_TAIL, 0, "Leading (High) surrogate pair U+%04X, with no trailing (Low) entity, replaced with U+FFFD." }, /* warning */
{ BAD_SURROGATE_LEAD, 0, "Trailing (Low) surrogate pair U+%04X, with no leading (High) entity, replaced with U+FFFD." }, /* warning */ { BAD_SURROGATE_LEAD, 0, "Trailing (Low) surrogate pair U+%04X, with no leading (High) entity, replaced with U+FFFD." }, /* warning */
@ -381,6 +380,7 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ COERCE_TO_ENDTAG_WARN, 0, "<%s> is probably intended as </%s>" }, /* Warning */ { COERCE_TO_ENDTAG_WARN, 0, "<%s> is probably intended as </%s>" }, /* Warning */
{ REMOVED_HTML5, 0, "%s element removed from HTML5" }, /* Warning */ { REMOVED_HTML5, 0, "%s element removed from HTML5" }, /* Warning */
{ BAD_SUMMARY_HTML5, 0, "The summary attribute on the %s element is obsolete in HTML5" }, /* Warning */ { BAD_SUMMARY_HTML5, 0, "The summary attribute on the %s element is obsolete in HTML5" }, /* Warning */
{ XML_DECLARATION_DETECTED, 0, "An XML declaration was detected. Did you mean to use input-xml?" }, /* Warning */
/* ReportNotice */ /* ReportNotice */
{ TRIM_EMPTY_ELEMENT, 0, "trimming empty %s" }, /* Notice */ { TRIM_EMPTY_ELEMENT, 0, "trimming empty %s" }, /* Notice */

View file

@ -210,39 +210,41 @@ static struct _doctypes
{ {
uint score; uint score;
uint vers; uint vers;
uint vers_out;
Bool xhtml;
ctmbstr name; ctmbstr name;
ctmbstr fpi; ctmbstr fpi;
ctmbstr si; ctmbstr si;
} const W3C_Doctypes[] = } const W3C_Doctypes[] =
{ {
{ 2, HT20, "HTML 2.0", "-//IETF//DTD HTML 2.0//EN", NULL, }, { 2, HT20, 200, no, "HTML 2.0", "-//IETF//DTD HTML 2.0//EN", NULL, },
{ 2, HT20, "HTML 2.0", "-//IETF//DTD HTML//EN", NULL, }, { 2, HT20, 200, no, "HTML 2.0", "-//IETF//DTD HTML//EN", NULL, },
{ 2, HT20, "HTML 2.0", "-//W3C//DTD HTML 2.0//EN", NULL, }, { 2, HT20, 200, no, "HTML 2.0", "-//W3C//DTD HTML 2.0//EN", NULL, },
{ 1, HT32, "HTML 3.2", "-//W3C//DTD HTML 3.2//EN", NULL, }, { 1, HT32, 320, no, "HTML 3.2", "-//W3C//DTD HTML 3.2//EN", NULL, },
{ 1, HT32, "HTML 3.2", "-//W3C//DTD HTML 3.2 Final//EN", NULL, }, { 1, HT32, 320, no, "HTML 3.2", "-//W3C//DTD HTML 3.2 Final//EN", NULL, },
{ 1, HT32, "HTML 3.2", "-//W3C//DTD HTML 3.2 Draft//EN", NULL, }, { 1, HT32, 320, no, "HTML 3.2", "-//W3C//DTD HTML 3.2 Draft//EN", NULL, },
{ 6, H40S, "HTML 4.0 Strict", "-//W3C//DTD HTML 4.0//EN", "http://www.w3.org/TR/REC-html40/strict.dtd" }, { 6, H40S, 400, no, "HTML 4.0 Strict", "-//W3C//DTD HTML 4.0//EN", "http://www.w3.org/TR/REC-html40/strict.dtd" },
{ 8, H40T, "HTML 4.0 Transitional", "-//W3C//DTD HTML 4.0 Transitional//EN", "http://www.w3.org/TR/REC-html40/loose.dtd" }, { 8, H40T, 400, no, "HTML 4.0 Transitional", "-//W3C//DTD HTML 4.0 Transitional//EN", "http://www.w3.org/TR/REC-html40/loose.dtd" },
{ 7, H40F, "HTML 4.0 Frameset", "-//W3C//DTD HTML 4.0 Frameset//EN", "http://www.w3.org/TR/REC-html40/frameset.dtd" }, { 7, H40F, 400, no, "HTML 4.0 Frameset", "-//W3C//DTD HTML 4.0 Frameset//EN", "http://www.w3.org/TR/REC-html40/frameset.dtd" },
{ 3, H41S, "HTML 4.01 Strict", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd" }, { 3, H41S, 401, no, "HTML 4.01 Strict", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd" },
{ 5, H41T, "HTML 4.01 Transitional", "-//W3C//DTD HTML 4.01 Transitional//EN", "http://www.w3.org/TR/html4/loose.dtd" }, { 5, H41T, 401, no, "HTML 4.01 Transitional", "-//W3C//DTD HTML 4.01 Transitional//EN", "http://www.w3.org/TR/html4/loose.dtd" },
{ 4, H41F, "HTML 4.01 Frameset", "-//W3C//DTD HTML 4.01 Frameset//EN", "http://www.w3.org/TR/html4/frameset.dtd" }, { 4, H41F, 401, no, "HTML 4.01 Frameset", "-//W3C//DTD HTML 4.01 Frameset//EN", "http://www.w3.org/TR/html4/frameset.dtd" },
{ 9, X10S, "XHTML 1.0 Strict", "-//W3C//DTD XHTML 1.0 Strict//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" }, { 9, X10S, 100, yes, "XHTML 1.0 Strict", "-//W3C//DTD XHTML 1.0 Strict//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" },
{ 11, X10T, "XHTML 1.0 Transitional", "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" }, { 11, X10T, 100, yes, "XHTML 1.0 Transitional", "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" },
{ 10, X10F, "XHTML 1.0 Frameset", "-//W3C//DTD XHTML 1.0 Frameset//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd" }, { 10, X10F, 100, yes, "XHTML 1.0 Frameset", "-//W3C//DTD XHTML 1.0 Frameset//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd" },
{ 12, XH11, "XHTML 1.1", "-//W3C//DTD XHTML 1.1//EN", "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" }, { 12, XH11, 110, yes, "XHTML 1.1", "-//W3C//DTD XHTML 1.1//EN", "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" },
{ 13, XB10, "XHTML Basic 1.0", "-//W3C//DTD XHTML Basic 1.0//EN", "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd" }, { 13, XB10, 100, yes, "XHTML Basic 1.0", "-//W3C//DTD XHTML Basic 1.0//EN", "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd" },
{ 20, HT50, "HTML5", NULL, NULL }, { 20, HT50, 500, no, "HTML5", NULL, NULL },
{ 21, XH50, "XHTML5", NULL, NULL }, { 21, XH50, 500, yes, "XHTML5", NULL, NULL },
/* reminder to add XHTML Print 1.0 support, see http://www.w3.org/TR/xhtml-print */ /* reminder to add XHTML Print 1.0 support, see http://www.w3.org/TR/xhtml-print */
#if 0 #if 0
{ 14, XP10, "XHTML Print 1.0", "-//W3C//DTD XHTML-Print 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-print10.dtd" }, { 14, XP10, 100, yes, "XHTML Print 1.0", "-//W3C//DTD XHTML-Print 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-print10.dtd" },
{ 14, XP10, "XHTML Print 1.0", "-//PWG//DTD XHTML-Print 1.0//EN", "http://www.xhtml-print.org/xhtml-print/xhtml-print10.dtd" }, { 14, XP10, 100, yes, "XHTML Print 1.0", "-//PWG//DTD XHTML-Print 1.0//EN", "http://www.xhtml-print.org/xhtml-print/xhtml-print10.dtd" },
#endif #endif
/* final entry */ /* final entry */
{ 0, 0, NULL, NULL, NULL } { 0, 0, 0, no, NULL, NULL, NULL }
}; };
int TY_(HTMLVersion)(TidyDocImpl* doc) int TY_(HTMLVersion)(TidyDocImpl* doc)
@ -1930,16 +1932,20 @@ uint TY_(ApparentVersion)( TidyDocImpl* doc )
ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool ARG_UNUSED(isXhtml) ) ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool ARG_UNUSED(isXhtml) )
{ {
ctmbstr name = GetNameFromVers(vers); ctmbstr name = GetNameFromVers(vers);
/* this test has moved to ReportMarkupVersion() in localize.c, for localization reasons */
/*
if (!name)
name = "HTML Proprietary";
*/
return name; return name;
} }
uint TY_(HTMLVersionNumberFromCode)( uint vers )
{
uint i;
for (i = 0; W3C_Doctypes[i].name; ++i)
if (W3C_Doctypes[i].vers == vers)
return W3C_Doctypes[i].vers_out;
return VERS_UNKNOWN;
}
Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc ) Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc )
{ {
Bool isXhtml = doc->lexer->isvoyager; Bool isXhtml = doc->lexer->isvoyager;

View file

@ -498,6 +498,8 @@ uint TY_(ApparentVersion)( TidyDocImpl* doc );
ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool isXhtml ); ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool isXhtml );
uint TY_(HTMLVersionNumberFromCode)( uint vers );
Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc ); Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc );
Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc ); Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc );

View file

@ -267,6 +267,9 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
case COERCE_TO_ENDTAG_WARN: case COERCE_TO_ENDTAG_WARN:
message = TY_(tidyMessageCreateWithNode)(doc, rpt, code, TidyWarning, node->element, node->element ); message = TY_(tidyMessageCreateWithNode)(doc, rpt, code, TidyWarning, node->element, node->element );
break; break;
case XML_DECLARATION_DETECTED:
message = TY_(tidyMessageCreateWithNode)(doc, node, code, TidyWarning );
break;
} }
messageOut( message ); messageOut( message );

View file

@ -4752,13 +4752,15 @@ void TY_(ParseDocument)(TidyDocImpl* doc)
{ {
if (node->type == XmlDecl) if (node->type == XmlDecl)
{ {
doc->xmlDetected = yes;
if (TY_(FindXmlDecl)(doc) && doc->root.content) if (TY_(FindXmlDecl)(doc) && doc->root.content)
{ {
TY_(ReportError)(doc, &doc->root, node, DISCARDING_UNEXPECTED); TY_(ReportError)(doc, &doc->root, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)(doc, node); TY_(FreeNode)(doc, node);
continue; continue;
} }
if (node->line != 1 || (node->line == 1 && node->column != 1)) if (node->line > 1 || node->column != 1)
{ {
TY_(ReportError)(doc, &doc->root, node, SPACE_PRECEDING_XMLDECL); TY_(ReportError)(doc, &doc->root, node, SPACE_PRECEDING_XMLDECL);
} }
@ -5015,6 +5017,8 @@ void TY_(ParseXMLDocument)(TidyDocImpl* doc)
TY_(SetOptionBool)( doc, TidyXmlTags, yes ); TY_(SetOptionBool)( doc, TidyXmlTags, yes );
doc->xmlDetected = yes;
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
{ {
/* discard unexpected end tags */ /* discard unexpected end tags */

View file

@ -76,7 +76,8 @@ struct _TidyDocImpl
uint badChars; /* for bad char encodings */ uint badChars; /* for bad char encodings */
uint badForm; /* bit field, for badly placed form tags, or other format errors */ uint badForm; /* bit field, for badly placed form tags, or other format errors */
Bool HTML5Mode; /* current mode is html5 */ Bool HTML5Mode; /* current mode is html5 */
Bool xmlDetected; /* true if XML was used/detected */
/* Memory allocator */ /* Memory allocator */
TidyAllocator* allocator; TidyAllocator* allocator;

View file

@ -955,18 +955,19 @@ int TIDY_CALL tidyStatus( TidyDoc tdoc )
} }
int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) ) int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) )
{ {
/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ TidyDocImpl* impl = tidyDocToImpl( tdoc );
return 0; return TY_(HTMLVersionNumberFromCode)( impl->lexer->versionEmitted );
} }
Bool TIDY_CALL tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) ) Bool TIDY_CALL tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) )
{ {
/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ TidyDocImpl* impl = tidyDocToImpl( tdoc );
return no; return impl->lexer->isvoyager;
} }
Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) ) Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) )
{ {
/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ TidyDocImpl* impl = tidyDocToImpl( tdoc );
return no; return impl->xmlDetected;
} }
uint TIDY_CALL tidyErrorCount( TidyDoc tdoc ) uint TIDY_CALL tidyErrorCount( TidyDoc tdoc )
@ -1400,6 +1401,7 @@ int TY_(DocParseStream)( TidyDocImpl* doc, StreamIn* in )
doc->root.line = doc->lexer->lines; doc->root.line = doc->lexer->lines;
doc->root.column = doc->lexer->columns; doc->root.column = doc->lexer->columns;
doc->inputHadBOM = no; doc->inputHadBOM = no;
doc->xmlDetected = no;
bomEnc = TY_(ReadBOMEncoding)(in); bomEnc = TY_(ReadBOMEncoding)(in);
@ -2095,6 +2097,12 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
if (doc->lexer->versionEmitted & VERS_HTML5) if (doc->lexer->versionEmitted & VERS_HTML5)
TY_(CheckHTML5)( doc, &doc->root ); TY_(CheckHTML5)( doc, &doc->root );
TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root ); TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root );
if ( !doc->lexer->isvoyager && doc->xmlDetected )
{
TY_(ReportWarning)(doc, NULL, TY_(FindXmlDecl)(doc), XML_DECLARATION_DETECTED );
}
} }
#if !defined(NDEBUG) && defined(_MSC_VER) #if !defined(NDEBUG) && defined(_MSC_VER)