Let's actually commit the -gdoc feature this time.

This commit is contained in:
Michael[tm] Smith 2012-06-20 16:55:42 +09:00
parent 45fce5e3c2
commit a772bbb17f
13 changed files with 85 additions and 5 deletions

5
.gitignore vendored
View file

@ -12,6 +12,11 @@
/htmldoc/tidy.1
/htmldoc/quickref.html
/lib/
/autom4te.cache/
/console/.deps/
/console/.libs/
/src/.deps/
/src/.libs/
*.user
*.suo
*.sdf

View file

@ -145,7 +145,7 @@ OBJFILES=\
$(OBJDIR)/attrask$(OBJSUF) $(OBJDIR)/attrdict$(OBJSUF) $(OBJDIR)/attrget$(OBJSUF) \
$(OBJDIR)/buffio$(OBJSUF) $(OBJDIR)/fileio$(OBJSUF) $(OBJDIR)/streamio$(OBJSUF) \
$(OBJDIR)/tagask$(OBJSUF) $(OBJDIR)/tmbstr$(OBJSUF) $(OBJDIR)/utf8$(OBJSUF) \
$(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF)
$(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF) $(OBJDIR)/gdoc$(OBJSUF)
CFILES= \
$(SRCDIR)/access.c $(SRCDIR)/attrs.c $(SRCDIR)/istack.c \
@ -155,7 +155,7 @@ CFILES= \
$(SRCDIR)/attrask.c $(SRCDIR)/attrdict.c $(SRCDIR)/attrget.c \
$(SRCDIR)/buffio.c $(SRCDIR)/fileio.c $(SRCDIR)/streamio.c \
$(SRCDIR)/tagask.c $(SRCDIR)/tmbstr.c $(SRCDIR)/utf8.c \
$(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c
$(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c $(SRCDIR)/gdoc.c
HFILES= $(INCDIR)/platform.h $(INCDIR)/tidy.h $(INCDIR)/tidyenum.h \
$(INCDIR)/buffio.h
@ -167,7 +167,7 @@ LIBHFILES= \
$(SRCDIR)/mappedio.h $(SRCDIR)/message.h $(SRCDIR)/parser.h \
$(SRCDIR)/pprint.h $(SRCDIR)/streamio.h $(SRCDIR)/tags.h \
$(SRCDIR)/tmbstr.h $(SRCDIR)/utf8.h $(SRCDIR)/tidy-int.h \
$(SRCDIR)/version.h
$(SRCDIR)/gdoc.h $(SRCDIR)/version.h

View file

@ -181,6 +181,9 @@ static const CmdOptDesc cmdopt_defs[] = {
{ "-bare",
"strip out smart quotes and em dashes, etc.",
"bare: yes", CmdOptProcDir, "-b" },
{ "-gdoc",
"produce clean version of html exported by google docs",
"gdoc: yes", CmdOptProcDir, "-g" },
{ "-numeric",
"output numeric rather than named entities",
"numeric-entities: yes", CmdOptProcDir, "-n" },
@ -1010,6 +1013,9 @@ int main( int argc, char** argv )
else if ( strcasecmp(arg, "clean") == 0 )
tidyOptSetBool( tdoc, TidyMakeClean, yes );
else if ( strcasecmp(arg, "gdoc") == 0 )
tidyOptSetBool( tdoc, TidyGDocClean, yes );
else if ( strcasecmp(arg, "bare") == 0 )
tidyOptSetBool( tdoc, TidyMakeBare, yes );
@ -1227,6 +1233,10 @@ int main( int argc, char** argv )
tidyOptSetBool( tdoc, TidyMakeClean, yes );
break;
case 'g':
tidyOptSetBool( tdoc, TidyGDocClean, yes );
break;
case 'b':
tidyOptSetBool( tdoc, TidyMakeBare, yes );
break;

View file

@ -126,6 +126,7 @@ typedef enum
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
TidyMakeClean, /**< Replace presentational clutter by style rules */
TidyGDocClean, /**< Clean up HTML exported from Google Docs */
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
TidyDropPropAttrs, /**< Discard proprietary attributes */
TidyDropFontTags, /**< Discard presentation tags */

View file

@ -8,7 +8,7 @@
<body>
<h1 id="top">Quick Reference</h1>
<h2>HTML Tidy Configuration Options</h2>
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/d193420">https://github.com/w3c/tidy-html5/tree/d193420</a></p>
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/f212c3f">https://github.com/w3c/tidy-html5/tree/f212c3f</a></p>
<p>
<a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a>
<br />
@ -188,6 +188,13 @@
<td>Boolean</td>
<td>yes</td>
</tr>
<tr>
<td>
<a href="#gdoc">gdoc</a>
</td>
<td>Boolean</td>
<td>no</td>
</tr>
<tr>
<td>
<a href="#hide-comments">hide-comments</a>
@ -1134,6 +1141,25 @@
<tr>
<td>&#160;</td>
</tr>
<tr>
<td class="tabletitle" valign="top" id="gdoc">gdoc</td>
<td class="tabletitlelink" valign="top" align="right">
<a href="#top">Top</a>
</td>
</tr>
<tr>
<td valign="top">Type: <strong>Boolean</strong><br />
Default: <strong>no</strong><br />Example: <strong>y/n, yes/no, t/f, true/false, 1/0</strong></td>
<td align="right" valign="top">
<a href="#drop-font-tags">drop-font-tags</a>
</td>
</tr>
<tr>
<td colspan="2">This option specifies if Tidy should enable specific behavior for cleaning up HTML exported fromGoogle Docs. </td>
</tr>
<tr>
<td>&#160;</td>
</tr>
<tr>
<td class="tabletitle" valign="top" id="hide-comments">hide-comments</td>
<td class="tabletitlelink" valign="top" align="right">

View file

@ -751,6 +751,27 @@ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
return attr;
}
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name )
{
AttVal *attr, *prev = NULL, *next;
for (attr = node->attributes; attr != NULL; prev = attr, attr = next)
{
next = attr->next;
if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
{
if (prev)
prev->next = next;
else
node->attributes = next;
TY_(FreeAttribute)( doc, attr );
break;
}
}
}
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value )
{

View file

@ -87,6 +87,8 @@ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value );

View file

@ -252,6 +252,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
{ TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
{ TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
{ TidyGDocClean, MU, "gdoc", BL, no, ParseBool, boolPicks },
{ TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
{ TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
{ TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },

View file

@ -359,6 +359,8 @@ static const TidyOptionId TidyDropFontTagsLinks[] =
{ TidyMakeClean, TidyUnknownOption };
static const TidyOptionId TidyMakeCleanTagsLinks[] =
{ TidyDropFontTags, TidyUnknownOption };
static const TidyOptionId TidyGDocCleanLinks[] =
{ TidyMakeClean, TidyUnknownOption };
/* Documentation of options */
static const TidyOptionDoc option_docs[] =
@ -405,6 +407,12 @@ static const TidyOptionDoc option_docs[] =
"on the HTML saved by Microsoft Office products. "
, TidyMakeCleanTagsLinks
},
{TidyGDocClean,
"This option specifies if Tidy "
"should enable specific behavior for cleaning up HTML exported from"
"Google Docs. "
, TidyMakeCleanTagsLinks
},
{TidyDoctype,
"This option specifies the DOCTYPE declaration generated by Tidy.<br />"
"If set to \"omit\" the output won't contain a DOCTYPE declaration.<br />"

0
src/mappedio.c Executable file → Normal file
View file

0
src/mappedio.h Executable file → Normal file
View file

View file

@ -29,6 +29,7 @@
#include "tidy-int.h"
#include "parser.h"
#include "clean.h"
#include "gdoc.h"
#include "config.h"
#include "message.h"
#include "pprint.h"
@ -1238,6 +1239,7 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
Bool word2K = cfgBool( doc, TidyWord2000 );
Bool logical = cfgBool( doc, TidyLogicalEmphasis );
Bool clean = cfgBool( doc, TidyMakeClean );
Bool gdoc = cfgBool( doc, TidyGDocClean );
Bool dropFont = cfgBool( doc, TidyDropFontTags );
Bool htmlOut = cfgBool( doc, TidyHtmlOut );
Bool xmlOut = cfgBool( doc, TidyXmlOut );
@ -1278,6 +1280,10 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
if ( clean || dropFont )
TY_(CleanDocument)( doc );
/* clean up html exported by Google Focs */
if ( gdoc )
TY_(CleanGoogleDocument)( doc );
/* Move terminating <br /> tags from out of paragraphs */
/*! Do we want to do this for all block-level elements? */

View file

@ -1 +1 @@
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/8025154";
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/45fce5e";