diff --git a/include/tidyenum.h b/include/tidyenum.h index 1cef305..a910ec7 100644 --- a/include/tidyenum.h +++ b/include/tidyenum.h @@ -241,6 +241,7 @@ typedef enum */ typedef enum { + TidyDoctypeHtml5, /**< */ TidyDoctypeOmit, /**< Omit DOCTYPE altogether */ TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */ TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */ diff --git a/quickref.html b/quickref.html index 2239f55..4c51584 100644 --- a/quickref.html +++ b/quickref.html @@ -908,11 +908,11 @@ Type: DocType
- Default: auto
Example: omit, auto, strict, transitional, user + Default: auto
Example: html5, omit, auto, strict, transitional, user - This option specifies the DOCTYPE declaration generated by Tidy. If set to "omit" the output won't contain a DOCTYPE declaration. If set to "auto" (the default) Tidy will use an educated guess based upon the contents of the document. If set to "strict", Tidy will set the DOCTYPE to the strict DTD. If set to "loose", the DOCTYPE is set to the loose (transitional) DTD. Alternatively, you can supply a string for the formal public identifier (FPI).

For example:
doctype: "-//ACME//DTD HTML 3.14159//EN"

If you specify the FPI for an XHTML document, Tidy will set the system identifier to an empty string. For an HTML document, Tidy adds a system identifier only if one was already present in order to preserve the processing mode of some browsers. Tidy leaves the DOCTYPE for generic XML documents unchanged. --doctype omit implies --numeric-entities yes. This option does not offer a validation of the document conformance. + This option specifies the DOCTYPE declaration generated by Tidy. If set to "omit" the output won't contain a DOCTYPE declaration. If set to "html5" the DOCTYPE is set to "<!DOCTYPE html>". If set to "auto" (the default) Tidy will use an educated guess based upon the contents of the document. If set to "strict", Tidy will set the DOCTYPE to the strict DTD. If set to "loose", the DOCTYPE is set to the loose (transitional) DTD. Alternatively, you can supply a string for the formal public identifier (FPI).

For example:
doctype: "-//ACME//DTD HTML 3.14159//EN"

If you specify the FPI for an XHTML document, Tidy will set the system identifier to an empty string. For an HTML document, Tidy adds a system identifier only if one was already present in order to preserve the processing mode of some browsers. Tidy leaves the DOCTYPE for generic XML documents unchanged. --doctype omit implies --numeric-entities yes. This option does not offer a validation of the document conformance.   diff --git a/src/config.c b/src/config.c index 95c3394..b48d910 100644 --- a/src/config.c +++ b/src/config.c @@ -136,6 +136,7 @@ static const ctmbstr newlinePicks[] = static const ctmbstr doctypePicks[] = { + "html5", "omit", "auto", "strict", @@ -206,7 +207,7 @@ static ParseProperty ParseSorter; static ParseProperty ParseCharEnc; static ParseProperty ParseNewline; -/* omit | auto | strict | loose | */ +/* html5 | omit | auto | strict | loose | */ static ParseProperty ParseDocType; /* keep-first or keep-last? */ @@ -1432,7 +1433,7 @@ ctmbstr TY_(CharEncodingOptName)( int encoding ) } /* - doctype: omit | auto | strict | loose | + doctype: html5 | omit | auto | strict | loose | where the fpi is a string similar to @@ -1469,6 +1470,8 @@ Bool ParseDocType( TidyDocImpl* doc, const TidyOptionImpl* option ) if ( TY_(tmbstrcasecmp)(buf, "auto") == 0 ) dtmode = TidyDoctypeAuto; + else if ( TY_(tmbstrcasecmp)(buf, "html5") == 0 ) + dtmode = TidyDoctypeHtml5; else if ( TY_(tmbstrcasecmp)(buf, "omit") == 0 ) dtmode = TidyDoctypeOmit; else if ( TY_(tmbstrcasecmp)(buf, "strict") == 0 ) diff --git a/src/lexer.c b/src/lexer.c index 36b9794..3a97244 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1551,8 +1551,6 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc ) TidyDoctypeModes dtmode = (TidyDoctypeModes)cfg(doc, TidyDoctypeMode); ctmbstr pub = "PUBLIC"; ctmbstr sys = "SYSTEM"; - Bool xhtml = (cfgBool(doc, TidyXmlOut) || doc->lexer->isvoyager) && - !cfgBool(doc, TidyHtmlOut); lexer->versionEmitted = TY_(ApparentVersion)( doc ); @@ -1578,6 +1576,13 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc ) switch(dtmode) { + + case TidyDoctypeHtml5: + /* HTML5 */ + TY_(RepairAttrValue)(doc, doctype, pub, NULL); + TY_(RepairAttrValue)(doc, doctype, sys, NULL); + lexer->versionEmitted = XH50; + break; case TidyDoctypeStrict: /* XHTML 1.0 Strict */ TY_(RepairAttrValue)(doc, doctype, pub, GetFPIFromVers(X10S)); @@ -1596,7 +1601,7 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc ) TY_(RepairAttrValue)(doc, doctype, sys, ""); break; case TidyDoctypeAuto: - if (xhtml && lexer->doctype == VERS_UNKNOWN) { + if (lexer->doctype == VERS_UNKNOWN) { lexer->versionEmitted = XH50; return yes; } @@ -1694,6 +1699,9 @@ Bool TY_(FixDocType)( TidyDocImpl* doc ) switch (dtmode) { + case TidyDoctypeHtml5: + guessed = HT50; + break; case TidyDoctypeStrict: guessed = H41S; break; diff --git a/src/localize.c b/src/localize.c index b47870a..43d4359 100644 --- a/src/localize.c +++ b/src/localize.c @@ -408,6 +408,7 @@ static const TidyOptionDoc option_docs[] = {TidyDoctype, "This option specifies the DOCTYPE declaration generated by Tidy. If set " "to \"omit\" the output won't contain a DOCTYPE declaration. If set to " + "\"html5\" the DOCTYPE is set to \"<!DOCTYPE html>\". If set to " "\"auto\" (the default) Tidy will use an educated guess based upon the " "contents of the document. If set to \"strict\", Tidy will set the DOCTYPE " "to the strict DTD. If set to \"loose\", the DOCTYPE is set to the loose " diff --git a/src/version.h b/src/version.h index ac18199..76b7446 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/bf1c2f6"; \ No newline at end of file +static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/5c4d2e5"; \ No newline at end of file