Added --doctype=html5 option value. Fixes #17.

This commit is contained in:
Michael[tm] Smith 2012-03-15 14:11:01 +09:00
parent 5c4d2e5b33
commit 0c8b587067
6 changed files with 21 additions and 8 deletions

View file

@ -241,6 +241,7 @@ typedef enum
*/
typedef enum
{
TidyDoctypeHtml5, /**< <!DOCTYPE html> */
TidyDoctypeOmit, /**< Omit DOCTYPE altogether */
TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */
TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */

View file

@ -908,11 +908,11 @@
</tr>
<tr>
<td valign="top">Type: <strong>DocType</strong><br />
Default: <strong>auto</strong><br />Example: <strong>omit, auto, strict, transitional, user</strong></td>
Default: <strong>auto</strong><br />Example: <strong>html5, omit, auto, strict, transitional, user</strong></td>
<td align="right" valign="top"></td>
</tr>
<tr>
<td colspan="2">This option specifies the DOCTYPE declaration generated by Tidy. If set to "omit" the output won't contain a DOCTYPE declaration. If set to "auto" (the default) Tidy will use an educated guess based upon the contents of the document. If set to "strict", Tidy will set the DOCTYPE to the strict DTD. If set to "loose", the DOCTYPE is set to the loose (transitional) DTD. Alternatively, you can supply a string for the formal public identifier (FPI).<br /><br />For example: <br />doctype: "-//ACME//DTD HTML 3.14159//EN"<br /><br />If you specify the FPI for an XHTML document, Tidy will set the system identifier to an empty string. For an HTML document, Tidy adds a system identifier only if one was already present in order to preserve the processing mode of some browsers. Tidy leaves the DOCTYPE for generic XML documents unchanged. <code>--doctype omit</code> implies <code>--numeric-entities yes</code>. This option does not offer a validation of the document conformance. </td>
<td colspan="2">This option specifies the DOCTYPE declaration generated by Tidy. If set to "omit" the output won't contain a DOCTYPE declaration. If set to "html5" the DOCTYPE is set to "&lt;!DOCTYPE html&gt;". If set to "auto" (the default) Tidy will use an educated guess based upon the contents of the document. If set to "strict", Tidy will set the DOCTYPE to the strict DTD. If set to "loose", the DOCTYPE is set to the loose (transitional) DTD. Alternatively, you can supply a string for the formal public identifier (FPI).<br /><br />For example: <br />doctype: "-//ACME//DTD HTML 3.14159//EN"<br /><br />If you specify the FPI for an XHTML document, Tidy will set the system identifier to an empty string. For an HTML document, Tidy adds a system identifier only if one was already present in order to preserve the processing mode of some browsers. Tidy leaves the DOCTYPE for generic XML documents unchanged. <code>--doctype omit</code> implies <code>--numeric-entities yes</code>. This option does not offer a validation of the document conformance. </td>
</tr>
<tr>
<td>&#160;</td>

View file

@ -136,6 +136,7 @@ static const ctmbstr newlinePicks[] =
static const ctmbstr doctypePicks[] =
{
"html5",
"omit",
"auto",
"strict",
@ -206,7 +207,7 @@ static ParseProperty ParseSorter;
static ParseProperty ParseCharEnc;
static ParseProperty ParseNewline;
/* omit | auto | strict | loose | <fpi> */
/* html5 | omit | auto | strict | loose | <fpi> */
static ParseProperty ParseDocType;
/* keep-first or keep-last? */
@ -1432,7 +1433,7 @@ ctmbstr TY_(CharEncodingOptName)( int encoding )
}
/*
doctype: omit | auto | strict | loose | <fpi>
doctype: html5 | omit | auto | strict | loose | <fpi>
where the fpi is a string similar to
@ -1469,6 +1470,8 @@ Bool ParseDocType( TidyDocImpl* doc, const TidyOptionImpl* option )
if ( TY_(tmbstrcasecmp)(buf, "auto") == 0 )
dtmode = TidyDoctypeAuto;
else if ( TY_(tmbstrcasecmp)(buf, "html5") == 0 )
dtmode = TidyDoctypeHtml5;
else if ( TY_(tmbstrcasecmp)(buf, "omit") == 0 )
dtmode = TidyDoctypeOmit;
else if ( TY_(tmbstrcasecmp)(buf, "strict") == 0 )

View file

@ -1551,8 +1551,6 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
TidyDoctypeModes dtmode = (TidyDoctypeModes)cfg(doc, TidyDoctypeMode);
ctmbstr pub = "PUBLIC";
ctmbstr sys = "SYSTEM";
Bool xhtml = (cfgBool(doc, TidyXmlOut) || doc->lexer->isvoyager) &&
!cfgBool(doc, TidyHtmlOut);
lexer->versionEmitted = TY_(ApparentVersion)( doc );
@ -1578,6 +1576,13 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
switch(dtmode)
{
case TidyDoctypeHtml5:
/* HTML5 */
TY_(RepairAttrValue)(doc, doctype, pub, NULL);
TY_(RepairAttrValue)(doc, doctype, sys, NULL);
lexer->versionEmitted = XH50;
break;
case TidyDoctypeStrict:
/* XHTML 1.0 Strict */
TY_(RepairAttrValue)(doc, doctype, pub, GetFPIFromVers(X10S));
@ -1596,7 +1601,7 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
TY_(RepairAttrValue)(doc, doctype, sys, "");
break;
case TidyDoctypeAuto:
if (xhtml && lexer->doctype == VERS_UNKNOWN) {
if (lexer->doctype == VERS_UNKNOWN) {
lexer->versionEmitted = XH50;
return yes;
}
@ -1694,6 +1699,9 @@ Bool TY_(FixDocType)( TidyDocImpl* doc )
switch (dtmode)
{
case TidyDoctypeHtml5:
guessed = HT50;
break;
case TidyDoctypeStrict:
guessed = H41S;
break;

View file

@ -408,6 +408,7 @@ static const TidyOptionDoc option_docs[] =
{TidyDoctype,
"This option specifies the DOCTYPE declaration generated by Tidy. If set "
"to \"omit\" the output won't contain a DOCTYPE declaration. If set to "
"\"html5\" the DOCTYPE is set to \"&lt;!DOCTYPE html>\". If set to "
"\"auto\" (the default) Tidy will use an educated guess based upon the "
"contents of the document. If set to \"strict\", Tidy will set the DOCTYPE "
"to the strict DTD. If set to \"loose\", the DOCTYPE is set to the loose "

View file

@ -1 +1 @@
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/bf1c2f6";
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/5c4d2e5";