Let's actually commit the -gdoc feature this time.

This commit is contained in:
Michael[tm] Smith 2012-06-20 16:55:42 +09:00
parent 45fce5e3c2
commit a772bbb17f
13 changed files with 85 additions and 5 deletions

5
.gitignore vendored
View file

@ -12,6 +12,11 @@
/htmldoc/tidy.1 /htmldoc/tidy.1
/htmldoc/quickref.html /htmldoc/quickref.html
/lib/ /lib/
/autom4te.cache/
/console/.deps/
/console/.libs/
/src/.deps/
/src/.libs/
*.user *.user
*.suo *.suo
*.sdf *.sdf

View file

@ -145,7 +145,7 @@ OBJFILES=\
$(OBJDIR)/attrask$(OBJSUF) $(OBJDIR)/attrdict$(OBJSUF) $(OBJDIR)/attrget$(OBJSUF) \ $(OBJDIR)/attrask$(OBJSUF) $(OBJDIR)/attrdict$(OBJSUF) $(OBJDIR)/attrget$(OBJSUF) \
$(OBJDIR)/buffio$(OBJSUF) $(OBJDIR)/fileio$(OBJSUF) $(OBJDIR)/streamio$(OBJSUF) \ $(OBJDIR)/buffio$(OBJSUF) $(OBJDIR)/fileio$(OBJSUF) $(OBJDIR)/streamio$(OBJSUF) \
$(OBJDIR)/tagask$(OBJSUF) $(OBJDIR)/tmbstr$(OBJSUF) $(OBJDIR)/utf8$(OBJSUF) \ $(OBJDIR)/tagask$(OBJSUF) $(OBJDIR)/tmbstr$(OBJSUF) $(OBJDIR)/utf8$(OBJSUF) \
$(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF) $(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF) $(OBJDIR)/gdoc$(OBJSUF)
CFILES= \ CFILES= \
$(SRCDIR)/access.c $(SRCDIR)/attrs.c $(SRCDIR)/istack.c \ $(SRCDIR)/access.c $(SRCDIR)/attrs.c $(SRCDIR)/istack.c \
@ -155,7 +155,7 @@ CFILES= \
$(SRCDIR)/attrask.c $(SRCDIR)/attrdict.c $(SRCDIR)/attrget.c \ $(SRCDIR)/attrask.c $(SRCDIR)/attrdict.c $(SRCDIR)/attrget.c \
$(SRCDIR)/buffio.c $(SRCDIR)/fileio.c $(SRCDIR)/streamio.c \ $(SRCDIR)/buffio.c $(SRCDIR)/fileio.c $(SRCDIR)/streamio.c \
$(SRCDIR)/tagask.c $(SRCDIR)/tmbstr.c $(SRCDIR)/utf8.c \ $(SRCDIR)/tagask.c $(SRCDIR)/tmbstr.c $(SRCDIR)/utf8.c \
$(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c $(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c $(SRCDIR)/gdoc.c
HFILES= $(INCDIR)/platform.h $(INCDIR)/tidy.h $(INCDIR)/tidyenum.h \ HFILES= $(INCDIR)/platform.h $(INCDIR)/tidy.h $(INCDIR)/tidyenum.h \
$(INCDIR)/buffio.h $(INCDIR)/buffio.h
@ -167,7 +167,7 @@ LIBHFILES= \
$(SRCDIR)/mappedio.h $(SRCDIR)/message.h $(SRCDIR)/parser.h \ $(SRCDIR)/mappedio.h $(SRCDIR)/message.h $(SRCDIR)/parser.h \
$(SRCDIR)/pprint.h $(SRCDIR)/streamio.h $(SRCDIR)/tags.h \ $(SRCDIR)/pprint.h $(SRCDIR)/streamio.h $(SRCDIR)/tags.h \
$(SRCDIR)/tmbstr.h $(SRCDIR)/utf8.h $(SRCDIR)/tidy-int.h \ $(SRCDIR)/tmbstr.h $(SRCDIR)/utf8.h $(SRCDIR)/tidy-int.h \
$(SRCDIR)/version.h $(SRCDIR)/gdoc.h $(SRCDIR)/version.h

View file

@ -181,6 +181,9 @@ static const CmdOptDesc cmdopt_defs[] = {
{ "-bare", { "-bare",
"strip out smart quotes and em dashes, etc.", "strip out smart quotes and em dashes, etc.",
"bare: yes", CmdOptProcDir, "-b" }, "bare: yes", CmdOptProcDir, "-b" },
{ "-gdoc",
"produce clean version of html exported by google docs",
"gdoc: yes", CmdOptProcDir, "-g" },
{ "-numeric", { "-numeric",
"output numeric rather than named entities", "output numeric rather than named entities",
"numeric-entities: yes", CmdOptProcDir, "-n" }, "numeric-entities: yes", CmdOptProcDir, "-n" },
@ -1010,6 +1013,9 @@ int main( int argc, char** argv )
else if ( strcasecmp(arg, "clean") == 0 ) else if ( strcasecmp(arg, "clean") == 0 )
tidyOptSetBool( tdoc, TidyMakeClean, yes ); tidyOptSetBool( tdoc, TidyMakeClean, yes );
else if ( strcasecmp(arg, "gdoc") == 0 )
tidyOptSetBool( tdoc, TidyGDocClean, yes );
else if ( strcasecmp(arg, "bare") == 0 ) else if ( strcasecmp(arg, "bare") == 0 )
tidyOptSetBool( tdoc, TidyMakeBare, yes ); tidyOptSetBool( tdoc, TidyMakeBare, yes );
@ -1227,6 +1233,10 @@ int main( int argc, char** argv )
tidyOptSetBool( tdoc, TidyMakeClean, yes ); tidyOptSetBool( tdoc, TidyMakeClean, yes );
break; break;
case 'g':
tidyOptSetBool( tdoc, TidyGDocClean, yes );
break;
case 'b': case 'b':
tidyOptSetBool( tdoc, TidyMakeBare, yes ); tidyOptSetBool( tdoc, TidyMakeBare, yes );
break; break;

View file

@ -126,6 +126,7 @@ typedef enum
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */ TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */ TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
TidyMakeClean, /**< Replace presentational clutter by style rules */ TidyMakeClean, /**< Replace presentational clutter by style rules */
TidyGDocClean, /**< Clean up HTML exported from Google Docs */
TidyLogicalEmphasis, /**< Replace i by em and b by strong */ TidyLogicalEmphasis, /**< Replace i by em and b by strong */
TidyDropPropAttrs, /**< Discard proprietary attributes */ TidyDropPropAttrs, /**< Discard proprietary attributes */
TidyDropFontTags, /**< Discard presentation tags */ TidyDropFontTags, /**< Discard presentation tags */

View file

@ -8,7 +8,7 @@
<body> <body>
<h1 id="top">Quick Reference</h1> <h1 id="top">Quick Reference</h1>
<h2>HTML Tidy Configuration Options</h2> <h2>HTML Tidy Configuration Options</h2>
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/d193420">https://github.com/w3c/tidy-html5/tree/d193420</a></p> <p>Version: <a href="https://github.com/w3c/tidy-html5/tree/f212c3f">https://github.com/w3c/tidy-html5/tree/f212c3f</a></p>
<p> <p>
<a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a> <a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a>
<br /> <br />
@ -188,6 +188,13 @@
<td>Boolean</td> <td>Boolean</td>
<td>yes</td> <td>yes</td>
</tr> </tr>
<tr>
<td>
<a href="#gdoc">gdoc</a>
</td>
<td>Boolean</td>
<td>no</td>
</tr>
<tr> <tr>
<td> <td>
<a href="#hide-comments">hide-comments</a> <a href="#hide-comments">hide-comments</a>
@ -1134,6 +1141,25 @@
<tr> <tr>
<td>&#160;</td> <td>&#160;</td>
</tr> </tr>
<tr>
<td class="tabletitle" valign="top" id="gdoc">gdoc</td>
<td class="tabletitlelink" valign="top" align="right">
<a href="#top">Top</a>
</td>
</tr>
<tr>
<td valign="top">Type: <strong>Boolean</strong><br />
Default: <strong>no</strong><br />Example: <strong>y/n, yes/no, t/f, true/false, 1/0</strong></td>
<td align="right" valign="top">
<a href="#drop-font-tags">drop-font-tags</a>
</td>
</tr>
<tr>
<td colspan="2">This option specifies if Tidy should enable specific behavior for cleaning up HTML exported fromGoogle Docs. </td>
</tr>
<tr>
<td>&#160;</td>
</tr>
<tr> <tr>
<td class="tabletitle" valign="top" id="hide-comments">hide-comments</td> <td class="tabletitle" valign="top" id="hide-comments">hide-comments</td>
<td class="tabletitlelink" valign="top" align="right"> <td class="tabletitlelink" valign="top" align="right">

View file

@ -751,6 +751,27 @@ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
return attr; return attr;
} }
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name )
{
AttVal *attr, *prev = NULL, *next;
for (attr = node->attributes; attr != NULL; prev = attr, attr = next)
{
next = attr->next;
if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
{
if (prev)
prev->next = next;
else
node->attributes = next;
TY_(FreeAttribute)( doc, attr );
break;
}
}
}
AttVal* TY_(AddAttribute)( TidyDocImpl* doc, AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value ) Node *node, ctmbstr name, ctmbstr value )
{ {

View file

@ -87,6 +87,8 @@ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name ); AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
AttVal* TY_(AddAttribute)( TidyDocImpl* doc, AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value ); Node *node, ctmbstr name, ctmbstr value );

View file

@ -252,6 +252,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks }, { TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
{ TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks }, { TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
{ TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks }, { TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
{ TidyGDocClean, MU, "gdoc", BL, no, ParseBool, boolPicks },
{ TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks }, { TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
{ TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks }, { TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
{ TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks }, { TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },

View file

@ -359,6 +359,8 @@ static const TidyOptionId TidyDropFontTagsLinks[] =
{ TidyMakeClean, TidyUnknownOption }; { TidyMakeClean, TidyUnknownOption };
static const TidyOptionId TidyMakeCleanTagsLinks[] = static const TidyOptionId TidyMakeCleanTagsLinks[] =
{ TidyDropFontTags, TidyUnknownOption }; { TidyDropFontTags, TidyUnknownOption };
static const TidyOptionId TidyGDocCleanLinks[] =
{ TidyMakeClean, TidyUnknownOption };
/* Documentation of options */ /* Documentation of options */
static const TidyOptionDoc option_docs[] = static const TidyOptionDoc option_docs[] =
@ -405,6 +407,12 @@ static const TidyOptionDoc option_docs[] =
"on the HTML saved by Microsoft Office products. " "on the HTML saved by Microsoft Office products. "
, TidyMakeCleanTagsLinks , TidyMakeCleanTagsLinks
}, },
{TidyGDocClean,
"This option specifies if Tidy "
"should enable specific behavior for cleaning up HTML exported from"
"Google Docs. "
, TidyMakeCleanTagsLinks
},
{TidyDoctype, {TidyDoctype,
"This option specifies the DOCTYPE declaration generated by Tidy.<br />" "This option specifies the DOCTYPE declaration generated by Tidy.<br />"
"If set to \"omit\" the output won't contain a DOCTYPE declaration.<br />" "If set to \"omit\" the output won't contain a DOCTYPE declaration.<br />"

0
src/mappedio.c Executable file → Normal file
View file

0
src/mappedio.h Executable file → Normal file
View file

View file

@ -29,6 +29,7 @@
#include "tidy-int.h" #include "tidy-int.h"
#include "parser.h" #include "parser.h"
#include "clean.h" #include "clean.h"
#include "gdoc.h"
#include "config.h" #include "config.h"
#include "message.h" #include "message.h"
#include "pprint.h" #include "pprint.h"
@ -1238,6 +1239,7 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
Bool word2K = cfgBool( doc, TidyWord2000 ); Bool word2K = cfgBool( doc, TidyWord2000 );
Bool logical = cfgBool( doc, TidyLogicalEmphasis ); Bool logical = cfgBool( doc, TidyLogicalEmphasis );
Bool clean = cfgBool( doc, TidyMakeClean ); Bool clean = cfgBool( doc, TidyMakeClean );
Bool gdoc = cfgBool( doc, TidyGDocClean );
Bool dropFont = cfgBool( doc, TidyDropFontTags ); Bool dropFont = cfgBool( doc, TidyDropFontTags );
Bool htmlOut = cfgBool( doc, TidyHtmlOut ); Bool htmlOut = cfgBool( doc, TidyHtmlOut );
Bool xmlOut = cfgBool( doc, TidyXmlOut ); Bool xmlOut = cfgBool( doc, TidyXmlOut );
@ -1278,6 +1280,10 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
if ( clean || dropFont ) if ( clean || dropFont )
TY_(CleanDocument)( doc ); TY_(CleanDocument)( doc );
/* clean up html exported by Google Focs */
if ( gdoc )
TY_(CleanGoogleDocument)( doc );
/* Move terminating <br /> tags from out of paragraphs */ /* Move terminating <br /> tags from out of paragraphs */
/*! Do we want to do this for all block-level elements? */ /*! Do we want to do this for all block-level elements? */

View file

@ -1 +1 @@
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/8025154"; static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/45fce5e";