From a4f752f274b42712810f6a963c8ab6e0ea3caaaf Mon Sep 17 00:00:00 2001 From: Jim Derry Date: Sun, 19 Mar 2017 15:41:51 -0400 Subject: [PATCH] Implement TODO: - tidyDetectedHtmlVersion() - tidyDetectedXhtml() - added two new fields to W3C_Doctypes[] in order to simplify this. - added TY_(HTMLVersionNumberFromCode)() to enable lookup. - Implement tidyDetectedGenericXml() - Added a warning message if an XML declaration exists but the document is not XHTML. - Remove dead commented code. - Updated POs and POT. Headers not affected, but translators should check their translations. - Testing is clean on Mac OS X, Ubuntu 16.04, and Windows 10. --- include/tidy.h | 22 ++++----- include/tidyenum.h | 1 + localize/translations/language_en_gb.po | 12 +++-- localize/translations/language_es.po | 12 +++-- localize/translations/language_es_mx.po | 12 +++-- localize/translations/language_fr.po | 12 +++-- localize/translations/language_zh_cn.po | 12 +++-- localize/translations/tidy.pot | 12 +++-- src/language_en.h | 36 +++++++------- src/lexer.c | 64 ++++++++++++++----------- src/lexer.h | 2 + src/message.c | 3 ++ src/parser.c | 6 ++- src/tidy-int.h | 3 +- src/tidylib.c | 20 +++++--- 15 files changed, 137 insertions(+), 92 deletions(-) diff --git a/include/tidy.h b/include/tidy.h index 967036f..30284c6 100644 --- a/include/tidy.h +++ b/include/tidy.h @@ -410,28 +410,24 @@ TIDY_EXPORT ctmbstr TIDY_CALL tidyLibraryVersion(void); */ TIDY_EXPORT int TIDY_CALL tidyStatus( TidyDoc tdoc ); -/** This function is supposed to return the detected HTML version; however it - ** isn't currently implemented. - ** @note TODO: This function currently does nothing. +/** Gets the version of HTML that was output, as an integer, times 100. For + ** example, HTML5 will return 500; HTML4.0.1 will return 401. ** @param tdoc An instance of a TidyDoc to query. - ** @result Returns the value `0`. + ** @result Returns the HTML version number (x100). */ TIDY_EXPORT int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc tdoc ); -/** This function is supposed to return a Bool indicating whether or not the - ** input document is XHTML; however it isn't currently implemented. - ** @note TODO: This function currently does nothing. +/** Indicates whether the output document is or isn't XHTML. ** @param tdoc An instance of a TidyDoc to query. - ** @result Returns the value `no`. + ** @result Returns `yes` if the document is an XHTML type. */ TIDY_EXPORT Bool TIDY_CALL tidyDetectedXhtml( TidyDoc tdoc ); -/** This function is supposed to return a Bool indicating whether or not the - ** input document is generic XML (i.e., not XHTL or HTML); however it isn't - ** currently implemented. - ** @note TODO: This function currently does nothing. +/** Indicates whether or not the input document was XML. If TidyXml tags is + ** true, or there was an XML declaration in the input document, then this + ** function will return yes. ** @param tdoc An instance of a TidyDoc to query. - ** @result Returns the value `no`. + ** @result Returns `yes` if the input document was XML. */ TIDY_EXPORT Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc tdoc ); diff --git a/include/tidyenum.h b/include/tidyenum.h index 4bbc64c..7304c2e 100644 --- a/include/tidyenum.h +++ b/include/tidyenum.h @@ -881,6 +881,7 @@ typedef enum FN(COERCE_TO_ENDTAG) \ FN(CONTENT_AFTER_BODY) \ FN(DISCARDING_UNEXPECTED) \ + FN(XML_DECLARATION_DETECTED) \ FN(DOCTYPE_AFTER_TAGS) \ FN(DTYPE_NOT_UPPER_CASE) \ FN(DUPLICATE_FRAMESET) \ diff --git a/localize/translations/language_en_gb.po b/localize/translations/language_en_gb.po index bfcbe2e..ee38a86 100644 --- a/localize/translations/language_en_gb.po +++ b/localize/translations/language_en_gb.po @@ -5,7 +5,7 @@ msgstr "" "Plural-Forms: nplurals=2; plural=n != 1;\n" "X-Generator: HTML Tidy poconvert.rb\n" "Project-Id-Version: \n" -"PO-Revision-Date: 2017-03-15 17:35:16\n" +"PO-Revision-Date: 2017-03-19 13:23:38\n" "Last-Translator: jderry\n" "Language-Team: \n" @@ -272,7 +272,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE1" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL.\n" +"see http://www.w3.org/WAI/GL." msgstr "" #. This console output should be limited to 78 characters per line. @@ -280,7 +280,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE2" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" +"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/." msgstr "" #. This console output should be limited to 78 characters per line. @@ -322,7 +322,7 @@ msgstr "" #. This console output should be limited to 78 characters per line. msgctxt "TEXT_USING_BODY" -msgid "You are recommended to use CSS to specify page and link colors\n" +msgid "You are recommended to use CSS to specify page and link colors" msgstr "You are recommended to use CSS to specify page and link colours\n" #. This console output should be limited to 78 characters per line. @@ -658,6 +658,10 @@ msgctxt "BAD_SUMMARY_HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5" msgstr "" +msgctxt "XML_DECLARATION_DETECTED" +msgid "An XML declaration was detected. Did you mean to use input-xml?" +msgstr "" + #, c-format msgctxt "TRIM_EMPTY_ELEMENT" msgid "trimming empty %s" diff --git a/localize/translations/language_es.po b/localize/translations/language_es.po index 7f403f0..d512d79 100644 --- a/localize/translations/language_es.po +++ b/localize/translations/language_es.po @@ -5,7 +5,7 @@ msgstr "" "Plural-Forms: nplurals=2; plural=n != 1;\n" "X-Generator: HTML Tidy poconvert.rb\n" "Project-Id-Version: \n" -"PO-Revision-Date: 2017-03-15 17:35:16\n" +"PO-Revision-Date: 2017-03-19 13:23:38\n" "Last-Translator: jderry\n" "Language-Team: \n" @@ -272,7 +272,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE1" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL.\n" +"see http://www.w3.org/WAI/GL." msgstr "" #. This console output should be limited to 78 characters per line. @@ -280,7 +280,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE2" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" +"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/." msgstr "" #. This console output should be limited to 78 characters per line. @@ -318,7 +318,7 @@ msgstr "" #. This console output should be limited to 78 characters per line. msgctxt "TEXT_USING_BODY" -msgid "You are recommended to use CSS to specify page and link colors\n" +msgid "You are recommended to use CSS to specify page and link colors" msgstr "" #. This console output should be limited to 78 characters per line. @@ -654,6 +654,10 @@ msgctxt "BAD_SUMMARY_HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5" msgstr "" +msgctxt "XML_DECLARATION_DETECTED" +msgid "An XML declaration was detected. Did you mean to use input-xml?" +msgstr "" + #, c-format msgctxt "TRIM_EMPTY_ELEMENT" msgid "trimming empty %s" diff --git a/localize/translations/language_es_mx.po b/localize/translations/language_es_mx.po index 558496e..1373d0f 100644 --- a/localize/translations/language_es_mx.po +++ b/localize/translations/language_es_mx.po @@ -5,7 +5,7 @@ msgstr "" "Plural-Forms: nplurals=2; plural=n != 1;\n" "X-Generator: HTML Tidy poconvert.rb\n" "Project-Id-Version: \n" -"PO-Revision-Date: 2017-03-15 17:35:16\n" +"PO-Revision-Date: 2017-03-19 13:23:38\n" "Last-Translator: jderry\n" "Language-Team: \n" @@ -272,7 +272,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE1" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL.\n" +"see http://www.w3.org/WAI/GL." msgstr "" #. This console output should be limited to 78 characters per line. @@ -280,7 +280,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE2" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" +"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/." msgstr "" #. This console output should be limited to 78 characters per line. @@ -318,7 +318,7 @@ msgstr "" #. This console output should be limited to 78 characters per line. msgctxt "TEXT_USING_BODY" -msgid "You are recommended to use CSS to specify page and link colors\n" +msgid "You are recommended to use CSS to specify page and link colors" msgstr "" #. This console output should be limited to 78 characters per line. @@ -654,6 +654,10 @@ msgctxt "BAD_SUMMARY_HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5" msgstr "" +msgctxt "XML_DECLARATION_DETECTED" +msgid "An XML declaration was detected. Did you mean to use input-xml?" +msgstr "" + #, c-format msgctxt "TRIM_EMPTY_ELEMENT" msgid "trimming empty %s" diff --git a/localize/translations/language_fr.po b/localize/translations/language_fr.po index 0c5f4d0..d7079b7 100644 --- a/localize/translations/language_fr.po +++ b/localize/translations/language_fr.po @@ -5,7 +5,7 @@ msgstr "" "Plural-Forms: nplurals=2; plural=(n > 1);\n" "X-Generator: HTML Tidy poconvert.rb\n" "Project-Id-Version: \n" -"PO-Revision-Date: 2017-03-15 17:35:16\n" +"PO-Revision-Date: 2017-03-19 13:23:38\n" "Last-Translator: jderry\n" "Language-Team: \n" @@ -377,7 +377,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE1" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL.\n" +"see http://www.w3.org/WAI/GL." msgstr "" "Pour plus d'informations sur la façon de rendre vos pages\n" "accessibles, voir http://www.w3.org/WAI/GL" @@ -387,7 +387,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE2" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" +"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/." msgstr "et http://www.html-tidy.org/Accessibility/" #. This console output should be limited to 78 characters per line. @@ -442,7 +442,7 @@ msgstr "" #. This console output should be limited to 78 characters per line. msgctxt "TEXT_USING_BODY" -msgid "You are recommended to use CSS to specify page and link colors\n" +msgid "You are recommended to use CSS to specify page and link colors" msgstr "" "Il est recommandé d'utiliser les CSS pour spécifier la page et de liaison des " "couleurs\n" @@ -791,6 +791,10 @@ msgctxt "BAD_SUMMARY_HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5" msgstr "L'attribut summary sur l'élément du %s est obsolète dans HTML5" +msgctxt "XML_DECLARATION_DETECTED" +msgid "An XML declaration was detected. Did you mean to use input-xml?" +msgstr "" + #, c-format msgctxt "TRIM_EMPTY_ELEMENT" msgid "trimming empty %s" diff --git a/localize/translations/language_zh_cn.po b/localize/translations/language_zh_cn.po index 63b043e..0a97447 100644 --- a/localize/translations/language_zh_cn.po +++ b/localize/translations/language_zh_cn.po @@ -5,7 +5,7 @@ msgstr "" "Plural-Forms: nplurals=1; plural=0;\n" "X-Generator: HTML Tidy poconvert.rb\n" "Project-Id-Version: \n" -"PO-Revision-Date: 2017-03-15 17:35:16\n" +"PO-Revision-Date: 2017-03-19 13:23:38\n" "Last-Translator: jderry\n" "Language-Team: \n" @@ -270,7 +270,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE1" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL.\n" +"see http://www.w3.org/WAI/GL." msgstr "" #. This console output should be limited to 78 characters per line. @@ -278,7 +278,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE2" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" +"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/." msgstr "" #. This console output should be limited to 78 characters per line. @@ -316,7 +316,7 @@ msgstr "" #. This console output should be limited to 78 characters per line. msgctxt "TEXT_USING_BODY" -msgid "You are recommended to use CSS to specify page and link colors\n" +msgid "You are recommended to use CSS to specify page and link colors" msgstr "" #. This console output should be limited to 78 characters per line. @@ -648,6 +648,10 @@ msgctxt "BAD_SUMMARY_HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5" msgstr "" +msgctxt "XML_DECLARATION_DETECTED" +msgid "An XML declaration was detected. Did you mean to use input-xml?" +msgstr "" + #, c-format msgctxt "TRIM_EMPTY_ELEMENT" msgid "trimming empty %s" diff --git a/localize/translations/tidy.pot b/localize/translations/tidy.pot index 10696d2..0a29b58 100644 --- a/localize/translations/tidy.pot +++ b/localize/translations/tidy.pot @@ -5,7 +5,7 @@ msgstr "" "Plural-Forms: nplurals=2; plural=n != 1;\n" "X-Generator: HTML Tidy poconvert.rb\n" "Project-Id-Version: \n" -"POT-Creation-Date: 2017-03-15 17:35:16\n" +"POT-Creation-Date: 2017-03-19 13:23:38\n" "Last-Translator: jderry\n" "Language-Team: \n" @@ -272,7 +272,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE1" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL.\n" +"see http://www.w3.org/WAI/GL." msgstr "" #. This console output should be limited to 78 characters per line. @@ -280,7 +280,7 @@ msgstr "" msgctxt "TEXT_ACCESS_ADVICE2" msgid "" "For further advice on how to make your pages accessible\n" -"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/.\n" +"see http://www.w3.org/WAI/GL and http://www.html-tidy.org/accessibility/." msgstr "" #. This console output should be limited to 78 characters per line. @@ -318,7 +318,7 @@ msgstr "" #. This console output should be limited to 78 characters per line. msgctxt "TEXT_USING_BODY" -msgid "You are recommended to use CSS to specify page and link colors\n" +msgid "You are recommended to use CSS to specify page and link colors" msgstr "" #. This console output should be limited to 78 characters per line. @@ -650,6 +650,10 @@ msgctxt "BAD_SUMMARY_HTML5" msgid "The summary attribute on the %s element is obsolete in HTML5" msgstr "" +msgctxt "XML_DECLARATION_DETECTED" +msgid "An XML declaration was detected. Did you mean to use input-xml?" +msgstr "" + #, c-format msgctxt "TRIM_EMPTY_ELEMENT" msgid "trimming empty %s" diff --git a/src/language_en.h b/src/language_en.h index aac3a3c..50239d9 100644 --- a/src/language_en.h +++ b/src/language_en.h @@ -293,32 +293,31 @@ static languageDefinition language_en = { whichPluralForm_en, { /*************************************** ** Message Severity Level ***************************************/ - { TidyInfo, 0, "Info: " }, - { TidyWarning, 0, "Warning: " }, - { TidyConfig, 0, "Config: " }, - { TidyAccess, 0, "Access: " }, - { TidyError, 0, "Error: " }, - { TidyBadDocument, 0, "Document: " }, - { TidyFatal, 0, "Panic: " }, - { TidyDialogueInfo, 0, "Information: " }, - { TidyDialogueSummary, 0, "Summary: " }, - { TidyDialogueDoc, 0, "Document: " }, + { TidyInfo, 0, "Info: " }, + { TidyWarning, 0, "Warning: " }, + { TidyConfig, 0, "Config: " }, + { TidyAccess, 0, "Access: " }, + { TidyError, 0, "Error: " }, + { TidyBadDocument, 0, "Document: " }, + { TidyFatal, 0, "Panic: " }, + { TidyDialogueInfo, 0, "Information: " }, + { TidyDialogueSummary, 0, "Summary: " }, + { TidyDialogueDoc, 0, "Document: " }, /*************************************** ** Warnings and Errors ***************************************/ /* ReportEncodingWarning */ - { - ENCODING_MISMATCH, 0, "specified input encoding (%s) does not match actual input encoding (%s)" }, /* Warning */ + { ENCODING_MISMATCH, 0, "specified input encoding (%s) does not match actual input encoding (%s)" }, /* Warning */ /* ReportEncodingError */ - { VENDOR_SPECIFIC_CHARS, 0, "%s invalid character code %s" }, /* Error */ - { INVALID_SGML_CHARS, 0, "%s invalid character code %s" }, /* Error */ - { INVALID_UTF8, 0, "%s invalid UTF-8 bytes (char. code %s)" }, /* Error */ - { INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" }, /* Error */ - { INVALID_NCR, 0, "%s invalid numeric character reference %s" }, /* Error */ - { BAD_SURROGATE_PAIR, 0, "Have out-of-range surrogate pair U+%04X:U+%04X, replaced with U+FFFD value."}, /* warning */ + { VENDOR_SPECIFIC_CHARS, 0, "%s invalid character code %s" }, /* Error */ + { INVALID_SGML_CHARS, 0, "%s invalid character code %s" }, /* Error */ + { INVALID_UTF8, 0, "%s invalid UTF-8 bytes (char. code %s)" }, /* Error */ + { INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" }, /* Error */ + { INVALID_NCR, 0, "%s invalid numeric character reference %s" }, /* Error */ + { BAD_SURROGATE_PAIR, 0, "Have out-of-range surrogate pair U+%04X:U+%04X, replaced with U+FFFD value." }, /* warning */ { BAD_SURROGATE_TAIL, 0, "Leading (High) surrogate pair U+%04X, with no trailing (Low) entity, replaced with U+FFFD." }, /* warning */ { BAD_SURROGATE_LEAD, 0, "Trailing (Low) surrogate pair U+%04X, with no leading (High) entity, replaced with U+FFFD." }, /* warning */ @@ -381,6 +380,7 @@ static languageDefinition language_en = { whichPluralForm_en, { { COERCE_TO_ENDTAG_WARN, 0, "<%s> is probably intended as " }, /* Warning */ { REMOVED_HTML5, 0, "%s element removed from HTML5" }, /* Warning */ { BAD_SUMMARY_HTML5, 0, "The summary attribute on the %s element is obsolete in HTML5" }, /* Warning */ + { XML_DECLARATION_DETECTED, 0, "An XML declaration was detected. Did you mean to use input-xml?" }, /* Warning */ /* ReportNotice */ { TRIM_EMPTY_ELEMENT, 0, "trimming empty %s" }, /* Notice */ diff --git a/src/lexer.c b/src/lexer.c index ea42eec..238fbfa 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -210,39 +210,41 @@ static struct _doctypes { uint score; uint vers; + uint vers_out; + Bool xhtml; ctmbstr name; ctmbstr fpi; ctmbstr si; } const W3C_Doctypes[] = { - { 2, HT20, "HTML 2.0", "-//IETF//DTD HTML 2.0//EN", NULL, }, - { 2, HT20, "HTML 2.0", "-//IETF//DTD HTML//EN", NULL, }, - { 2, HT20, "HTML 2.0", "-//W3C//DTD HTML 2.0//EN", NULL, }, - { 1, HT32, "HTML 3.2", "-//W3C//DTD HTML 3.2//EN", NULL, }, - { 1, HT32, "HTML 3.2", "-//W3C//DTD HTML 3.2 Final//EN", NULL, }, - { 1, HT32, "HTML 3.2", "-//W3C//DTD HTML 3.2 Draft//EN", NULL, }, - { 6, H40S, "HTML 4.0 Strict", "-//W3C//DTD HTML 4.0//EN", "http://www.w3.org/TR/REC-html40/strict.dtd" }, - { 8, H40T, "HTML 4.0 Transitional", "-//W3C//DTD HTML 4.0 Transitional//EN", "http://www.w3.org/TR/REC-html40/loose.dtd" }, - { 7, H40F, "HTML 4.0 Frameset", "-//W3C//DTD HTML 4.0 Frameset//EN", "http://www.w3.org/TR/REC-html40/frameset.dtd" }, - { 3, H41S, "HTML 4.01 Strict", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd" }, - { 5, H41T, "HTML 4.01 Transitional", "-//W3C//DTD HTML 4.01 Transitional//EN", "http://www.w3.org/TR/html4/loose.dtd" }, - { 4, H41F, "HTML 4.01 Frameset", "-//W3C//DTD HTML 4.01 Frameset//EN", "http://www.w3.org/TR/html4/frameset.dtd" }, - { 9, X10S, "XHTML 1.0 Strict", "-//W3C//DTD XHTML 1.0 Strict//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" }, - { 11, X10T, "XHTML 1.0 Transitional", "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" }, - { 10, X10F, "XHTML 1.0 Frameset", "-//W3C//DTD XHTML 1.0 Frameset//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd" }, - { 12, XH11, "XHTML 1.1", "-//W3C//DTD XHTML 1.1//EN", "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" }, - { 13, XB10, "XHTML Basic 1.0", "-//W3C//DTD XHTML Basic 1.0//EN", "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd" }, + { 2, HT20, 200, no, "HTML 2.0", "-//IETF//DTD HTML 2.0//EN", NULL, }, + { 2, HT20, 200, no, "HTML 2.0", "-//IETF//DTD HTML//EN", NULL, }, + { 2, HT20, 200, no, "HTML 2.0", "-//W3C//DTD HTML 2.0//EN", NULL, }, + { 1, HT32, 320, no, "HTML 3.2", "-//W3C//DTD HTML 3.2//EN", NULL, }, + { 1, HT32, 320, no, "HTML 3.2", "-//W3C//DTD HTML 3.2 Final//EN", NULL, }, + { 1, HT32, 320, no, "HTML 3.2", "-//W3C//DTD HTML 3.2 Draft//EN", NULL, }, + { 6, H40S, 400, no, "HTML 4.0 Strict", "-//W3C//DTD HTML 4.0//EN", "http://www.w3.org/TR/REC-html40/strict.dtd" }, + { 8, H40T, 400, no, "HTML 4.0 Transitional", "-//W3C//DTD HTML 4.0 Transitional//EN", "http://www.w3.org/TR/REC-html40/loose.dtd" }, + { 7, H40F, 400, no, "HTML 4.0 Frameset", "-//W3C//DTD HTML 4.0 Frameset//EN", "http://www.w3.org/TR/REC-html40/frameset.dtd" }, + { 3, H41S, 401, no, "HTML 4.01 Strict", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd" }, + { 5, H41T, 401, no, "HTML 4.01 Transitional", "-//W3C//DTD HTML 4.01 Transitional//EN", "http://www.w3.org/TR/html4/loose.dtd" }, + { 4, H41F, 401, no, "HTML 4.01 Frameset", "-//W3C//DTD HTML 4.01 Frameset//EN", "http://www.w3.org/TR/html4/frameset.dtd" }, + { 9, X10S, 100, yes, "XHTML 1.0 Strict", "-//W3C//DTD XHTML 1.0 Strict//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" }, + { 11, X10T, 100, yes, "XHTML 1.0 Transitional", "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" }, + { 10, X10F, 100, yes, "XHTML 1.0 Frameset", "-//W3C//DTD XHTML 1.0 Frameset//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd" }, + { 12, XH11, 110, yes, "XHTML 1.1", "-//W3C//DTD XHTML 1.1//EN", "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" }, + { 13, XB10, 100, yes, "XHTML Basic 1.0", "-//W3C//DTD XHTML Basic 1.0//EN", "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd" }, - { 20, HT50, "HTML5", NULL, NULL }, - { 21, XH50, "XHTML5", NULL, NULL }, + { 20, HT50, 500, no, "HTML5", NULL, NULL }, + { 21, XH50, 500, yes, "XHTML5", NULL, NULL }, /* reminder to add XHTML Print 1.0 support, see http://www.w3.org/TR/xhtml-print */ #if 0 - { 14, XP10, "XHTML Print 1.0", "-//W3C//DTD XHTML-Print 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-print10.dtd" }, - { 14, XP10, "XHTML Print 1.0", "-//PWG//DTD XHTML-Print 1.0//EN", "http://www.xhtml-print.org/xhtml-print/xhtml-print10.dtd" }, + { 14, XP10, 100, yes, "XHTML Print 1.0", "-//W3C//DTD XHTML-Print 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-print10.dtd" }, + { 14, XP10, 100, yes, "XHTML Print 1.0", "-//PWG//DTD XHTML-Print 1.0//EN", "http://www.xhtml-print.org/xhtml-print/xhtml-print10.dtd" }, #endif /* final entry */ - { 0, 0, NULL, NULL, NULL } + { 0, 0, 0, no, NULL, NULL, NULL } }; int TY_(HTMLVersion)(TidyDocImpl* doc) @@ -1930,16 +1932,20 @@ uint TY_(ApparentVersion)( TidyDocImpl* doc ) ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool ARG_UNUSED(isXhtml) ) { ctmbstr name = GetNameFromVers(vers); - - /* this test has moved to ReportMarkupVersion() in localize.c, for localization reasons */ - /* - if (!name) - name = "HTML Proprietary"; - */ - return name; } +uint TY_(HTMLVersionNumberFromCode)( uint vers ) +{ + uint i; + + for (i = 0; W3C_Doctypes[i].name; ++i) + if (W3C_Doctypes[i].vers == vers) + return W3C_Doctypes[i].vers_out; + + return VERS_UNKNOWN; +} + Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc ) { Bool isXhtml = doc->lexer->isvoyager; diff --git a/src/lexer.h b/src/lexer.h index 0c8d5bb..1d3d9cd 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -498,6 +498,8 @@ uint TY_(ApparentVersion)( TidyDocImpl* doc ); ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool isXhtml ); +uint TY_(HTMLVersionNumberFromCode)( uint vers ); + Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc ); Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc ); diff --git a/src/message.c b/src/message.c index c9dbe35..0ce2d54 100755 --- a/src/message.c +++ b/src/message.c @@ -267,6 +267,9 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code) case COERCE_TO_ENDTAG_WARN: message = TY_(tidyMessageCreateWithNode)(doc, rpt, code, TidyWarning, node->element, node->element ); break; + case XML_DECLARATION_DETECTED: + message = TY_(tidyMessageCreateWithNode)(doc, node, code, TidyWarning ); + break; } messageOut( message ); diff --git a/src/parser.c b/src/parser.c index 6ff2388..dd9d11e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -4752,13 +4752,15 @@ void TY_(ParseDocument)(TidyDocImpl* doc) { if (node->type == XmlDecl) { + doc->xmlDetected = yes; + if (TY_(FindXmlDecl)(doc) && doc->root.content) { TY_(ReportError)(doc, &doc->root, node, DISCARDING_UNEXPECTED); TY_(FreeNode)(doc, node); continue; } - if (node->line != 1 || (node->line == 1 && node->column != 1)) + if (node->line > 1 || node->column != 1) { TY_(ReportError)(doc, &doc->root, node, SPACE_PRECEDING_XMLDECL); } @@ -5015,6 +5017,8 @@ void TY_(ParseXMLDocument)(TidyDocImpl* doc) TY_(SetOptionBool)( doc, TidyXmlTags, yes ); + doc->xmlDetected = yes; + while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) { /* discard unexpected end tags */ diff --git a/src/tidy-int.h b/src/tidy-int.h index 78c810c..7973c74 100755 --- a/src/tidy-int.h +++ b/src/tidy-int.h @@ -76,7 +76,8 @@ struct _TidyDocImpl uint badChars; /* for bad char encodings */ uint badForm; /* bit field, for badly placed form tags, or other format errors */ - Bool HTML5Mode; /* current mode is html5 */ + Bool HTML5Mode; /* current mode is html5 */ + Bool xmlDetected; /* true if XML was used/detected */ /* Memory allocator */ TidyAllocator* allocator; diff --git a/src/tidylib.c b/src/tidylib.c index 06e3594..60cbd03 100755 --- a/src/tidylib.c +++ b/src/tidylib.c @@ -955,18 +955,19 @@ int TIDY_CALL tidyStatus( TidyDoc tdoc ) } int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) ) { -/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ - return 0; + TidyDocImpl* impl = tidyDocToImpl( tdoc ); + return TY_(HTMLVersionNumberFromCode)( impl->lexer->versionEmitted ); } + Bool TIDY_CALL tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) ) { -/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ - return no; + TidyDocImpl* impl = tidyDocToImpl( tdoc ); + return impl->lexer->isvoyager; } Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) ) { -/* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ - return no; + TidyDocImpl* impl = tidyDocToImpl( tdoc ); + return impl->xmlDetected; } uint TIDY_CALL tidyErrorCount( TidyDoc tdoc ) @@ -1400,6 +1401,7 @@ int TY_(DocParseStream)( TidyDocImpl* doc, StreamIn* in ) doc->root.line = doc->lexer->lines; doc->root.column = doc->lexer->columns; doc->inputHadBOM = no; + doc->xmlDetected = no; bomEnc = TY_(ReadBOMEncoding)(in); @@ -2095,6 +2097,12 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc ) if (doc->lexer->versionEmitted & VERS_HTML5) TY_(CheckHTML5)( doc, &doc->root ); TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root ); + + if ( !doc->lexer->isvoyager && doc->xmlDetected ) + { + TY_(ReportWarning)(doc, NULL, TY_(FindXmlDecl)(doc), XML_DECLARATION_DETECTED ); + + } } #if !defined(NDEBUG) && defined(_MSC_VER)