Let's actually commit the -gdoc feature this time.
This commit is contained in:
parent
45fce5e3c2
commit
a772bbb17f
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -12,6 +12,11 @@
|
||||||
/htmldoc/tidy.1
|
/htmldoc/tidy.1
|
||||||
/htmldoc/quickref.html
|
/htmldoc/quickref.html
|
||||||
/lib/
|
/lib/
|
||||||
|
/autom4te.cache/
|
||||||
|
/console/.deps/
|
||||||
|
/console/.libs/
|
||||||
|
/src/.deps/
|
||||||
|
/src/.libs/
|
||||||
*.user
|
*.user
|
||||||
*.suo
|
*.suo
|
||||||
*.sdf
|
*.sdf
|
||||||
|
|
|
@ -145,7 +145,7 @@ OBJFILES=\
|
||||||
$(OBJDIR)/attrask$(OBJSUF) $(OBJDIR)/attrdict$(OBJSUF) $(OBJDIR)/attrget$(OBJSUF) \
|
$(OBJDIR)/attrask$(OBJSUF) $(OBJDIR)/attrdict$(OBJSUF) $(OBJDIR)/attrget$(OBJSUF) \
|
||||||
$(OBJDIR)/buffio$(OBJSUF) $(OBJDIR)/fileio$(OBJSUF) $(OBJDIR)/streamio$(OBJSUF) \
|
$(OBJDIR)/buffio$(OBJSUF) $(OBJDIR)/fileio$(OBJSUF) $(OBJDIR)/streamio$(OBJSUF) \
|
||||||
$(OBJDIR)/tagask$(OBJSUF) $(OBJDIR)/tmbstr$(OBJSUF) $(OBJDIR)/utf8$(OBJSUF) \
|
$(OBJDIR)/tagask$(OBJSUF) $(OBJDIR)/tmbstr$(OBJSUF) $(OBJDIR)/utf8$(OBJSUF) \
|
||||||
$(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF)
|
$(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF) $(OBJDIR)/gdoc$(OBJSUF)
|
||||||
|
|
||||||
CFILES= \
|
CFILES= \
|
||||||
$(SRCDIR)/access.c $(SRCDIR)/attrs.c $(SRCDIR)/istack.c \
|
$(SRCDIR)/access.c $(SRCDIR)/attrs.c $(SRCDIR)/istack.c \
|
||||||
|
@ -155,7 +155,7 @@ CFILES= \
|
||||||
$(SRCDIR)/attrask.c $(SRCDIR)/attrdict.c $(SRCDIR)/attrget.c \
|
$(SRCDIR)/attrask.c $(SRCDIR)/attrdict.c $(SRCDIR)/attrget.c \
|
||||||
$(SRCDIR)/buffio.c $(SRCDIR)/fileio.c $(SRCDIR)/streamio.c \
|
$(SRCDIR)/buffio.c $(SRCDIR)/fileio.c $(SRCDIR)/streamio.c \
|
||||||
$(SRCDIR)/tagask.c $(SRCDIR)/tmbstr.c $(SRCDIR)/utf8.c \
|
$(SRCDIR)/tagask.c $(SRCDIR)/tmbstr.c $(SRCDIR)/utf8.c \
|
||||||
$(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c
|
$(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c $(SRCDIR)/gdoc.c
|
||||||
|
|
||||||
HFILES= $(INCDIR)/platform.h $(INCDIR)/tidy.h $(INCDIR)/tidyenum.h \
|
HFILES= $(INCDIR)/platform.h $(INCDIR)/tidy.h $(INCDIR)/tidyenum.h \
|
||||||
$(INCDIR)/buffio.h
|
$(INCDIR)/buffio.h
|
||||||
|
@ -167,7 +167,7 @@ LIBHFILES= \
|
||||||
$(SRCDIR)/mappedio.h $(SRCDIR)/message.h $(SRCDIR)/parser.h \
|
$(SRCDIR)/mappedio.h $(SRCDIR)/message.h $(SRCDIR)/parser.h \
|
||||||
$(SRCDIR)/pprint.h $(SRCDIR)/streamio.h $(SRCDIR)/tags.h \
|
$(SRCDIR)/pprint.h $(SRCDIR)/streamio.h $(SRCDIR)/tags.h \
|
||||||
$(SRCDIR)/tmbstr.h $(SRCDIR)/utf8.h $(SRCDIR)/tidy-int.h \
|
$(SRCDIR)/tmbstr.h $(SRCDIR)/utf8.h $(SRCDIR)/tidy-int.h \
|
||||||
$(SRCDIR)/version.h
|
$(SRCDIR)/gdoc.h $(SRCDIR)/version.h
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -181,6 +181,9 @@ static const CmdOptDesc cmdopt_defs[] = {
|
||||||
{ "-bare",
|
{ "-bare",
|
||||||
"strip out smart quotes and em dashes, etc.",
|
"strip out smart quotes and em dashes, etc.",
|
||||||
"bare: yes", CmdOptProcDir, "-b" },
|
"bare: yes", CmdOptProcDir, "-b" },
|
||||||
|
{ "-gdoc",
|
||||||
|
"produce clean version of html exported by google docs",
|
||||||
|
"gdoc: yes", CmdOptProcDir, "-g" },
|
||||||
{ "-numeric",
|
{ "-numeric",
|
||||||
"output numeric rather than named entities",
|
"output numeric rather than named entities",
|
||||||
"numeric-entities: yes", CmdOptProcDir, "-n" },
|
"numeric-entities: yes", CmdOptProcDir, "-n" },
|
||||||
|
@ -1010,6 +1013,9 @@ int main( int argc, char** argv )
|
||||||
else if ( strcasecmp(arg, "clean") == 0 )
|
else if ( strcasecmp(arg, "clean") == 0 )
|
||||||
tidyOptSetBool( tdoc, TidyMakeClean, yes );
|
tidyOptSetBool( tdoc, TidyMakeClean, yes );
|
||||||
|
|
||||||
|
else if ( strcasecmp(arg, "gdoc") == 0 )
|
||||||
|
tidyOptSetBool( tdoc, TidyGDocClean, yes );
|
||||||
|
|
||||||
else if ( strcasecmp(arg, "bare") == 0 )
|
else if ( strcasecmp(arg, "bare") == 0 )
|
||||||
tidyOptSetBool( tdoc, TidyMakeBare, yes );
|
tidyOptSetBool( tdoc, TidyMakeBare, yes );
|
||||||
|
|
||||||
|
@ -1227,6 +1233,10 @@ int main( int argc, char** argv )
|
||||||
tidyOptSetBool( tdoc, TidyMakeClean, yes );
|
tidyOptSetBool( tdoc, TidyMakeClean, yes );
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 'g':
|
||||||
|
tidyOptSetBool( tdoc, TidyGDocClean, yes );
|
||||||
|
break;
|
||||||
|
|
||||||
case 'b':
|
case 'b':
|
||||||
tidyOptSetBool( tdoc, TidyMakeBare, yes );
|
tidyOptSetBool( tdoc, TidyMakeBare, yes );
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -126,6 +126,7 @@ typedef enum
|
||||||
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
|
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
|
||||||
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
|
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
|
||||||
TidyMakeClean, /**< Replace presentational clutter by style rules */
|
TidyMakeClean, /**< Replace presentational clutter by style rules */
|
||||||
|
TidyGDocClean, /**< Clean up HTML exported from Google Docs */
|
||||||
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
|
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
|
||||||
TidyDropPropAttrs, /**< Discard proprietary attributes */
|
TidyDropPropAttrs, /**< Discard proprietary attributes */
|
||||||
TidyDropFontTags, /**< Discard presentation tags */
|
TidyDropFontTags, /**< Discard presentation tags */
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
<body>
|
<body>
|
||||||
<h1 id="top">Quick Reference</h1>
|
<h1 id="top">Quick Reference</h1>
|
||||||
<h2>HTML Tidy Configuration Options</h2>
|
<h2>HTML Tidy Configuration Options</h2>
|
||||||
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/d193420">https://github.com/w3c/tidy-html5/tree/d193420</a></p>
|
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/f212c3f">https://github.com/w3c/tidy-html5/tree/f212c3f</a></p>
|
||||||
<p>
|
<p>
|
||||||
<a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a>
|
<a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a>
|
||||||
<br />
|
<br />
|
||||||
|
@ -188,6 +188,13 @@
|
||||||
<td>Boolean</td>
|
<td>Boolean</td>
|
||||||
<td>yes</td>
|
<td>yes</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<a href="#gdoc">gdoc</a>
|
||||||
|
</td>
|
||||||
|
<td>Boolean</td>
|
||||||
|
<td>no</td>
|
||||||
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<a href="#hide-comments">hide-comments</a>
|
<a href="#hide-comments">hide-comments</a>
|
||||||
|
@ -1134,6 +1141,25 @@
|
||||||
<tr>
|
<tr>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td class="tabletitle" valign="top" id="gdoc">gdoc</td>
|
||||||
|
<td class="tabletitlelink" valign="top" align="right">
|
||||||
|
<a href="#top">Top</a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td valign="top">Type: <strong>Boolean</strong><br />
|
||||||
|
Default: <strong>no</strong><br />Example: <strong>y/n, yes/no, t/f, true/false, 1/0</strong></td>
|
||||||
|
<td align="right" valign="top">
|
||||||
|
<a href="#drop-font-tags">drop-font-tags</a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td colspan="2">This option specifies if Tidy should enable specific behavior for cleaning up HTML exported fromGoogle Docs. </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td> </td>
|
||||||
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td class="tabletitle" valign="top" id="hide-comments">hide-comments</td>
|
<td class="tabletitle" valign="top" id="hide-comments">hide-comments</td>
|
||||||
<td class="tabletitlelink" valign="top" align="right">
|
<td class="tabletitlelink" valign="top" align="right">
|
||||||
|
|
21
src/attrs.c
21
src/attrs.c
|
@ -751,6 +751,27 @@ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
|
||||||
return attr;
|
return attr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name )
|
||||||
|
{
|
||||||
|
AttVal *attr, *prev = NULL, *next;
|
||||||
|
|
||||||
|
for (attr = node->attributes; attr != NULL; prev = attr, attr = next)
|
||||||
|
{
|
||||||
|
next = attr->next;
|
||||||
|
|
||||||
|
if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
|
||||||
|
{
|
||||||
|
if (prev)
|
||||||
|
prev->next = next;
|
||||||
|
else
|
||||||
|
node->attributes = next;
|
||||||
|
|
||||||
|
TY_(FreeAttribute)( doc, attr );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
||||||
Node *node, ctmbstr name, ctmbstr value )
|
Node *node, ctmbstr name, ctmbstr value )
|
||||||
{
|
{
|
||||||
|
|
|
@ -87,6 +87,8 @@ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
|
||||||
|
|
||||||
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
|
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
|
||||||
|
|
||||||
|
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
|
||||||
|
|
||||||
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
||||||
Node *node, ctmbstr name, ctmbstr value );
|
Node *node, ctmbstr name, ctmbstr value );
|
||||||
|
|
||||||
|
|
|
@ -252,6 +252,7 @@ static const TidyOptionImpl option_defs[] =
|
||||||
{ TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
|
{ TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
|
||||||
{ TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
|
{ TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
|
||||||
{ TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
|
{ TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
|
||||||
|
{ TidyGDocClean, MU, "gdoc", BL, no, ParseBool, boolPicks },
|
||||||
{ TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
|
{ TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
|
||||||
{ TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
|
{ TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
|
||||||
{ TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },
|
{ TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },
|
||||||
|
|
|
@ -359,6 +359,8 @@ static const TidyOptionId TidyDropFontTagsLinks[] =
|
||||||
{ TidyMakeClean, TidyUnknownOption };
|
{ TidyMakeClean, TidyUnknownOption };
|
||||||
static const TidyOptionId TidyMakeCleanTagsLinks[] =
|
static const TidyOptionId TidyMakeCleanTagsLinks[] =
|
||||||
{ TidyDropFontTags, TidyUnknownOption };
|
{ TidyDropFontTags, TidyUnknownOption };
|
||||||
|
static const TidyOptionId TidyGDocCleanLinks[] =
|
||||||
|
{ TidyMakeClean, TidyUnknownOption };
|
||||||
|
|
||||||
/* Documentation of options */
|
/* Documentation of options */
|
||||||
static const TidyOptionDoc option_docs[] =
|
static const TidyOptionDoc option_docs[] =
|
||||||
|
@ -405,6 +407,12 @@ static const TidyOptionDoc option_docs[] =
|
||||||
"on the HTML saved by Microsoft Office products. "
|
"on the HTML saved by Microsoft Office products. "
|
||||||
, TidyMakeCleanTagsLinks
|
, TidyMakeCleanTagsLinks
|
||||||
},
|
},
|
||||||
|
{TidyGDocClean,
|
||||||
|
"This option specifies if Tidy "
|
||||||
|
"should enable specific behavior for cleaning up HTML exported from"
|
||||||
|
"Google Docs. "
|
||||||
|
, TidyMakeCleanTagsLinks
|
||||||
|
},
|
||||||
{TidyDoctype,
|
{TidyDoctype,
|
||||||
"This option specifies the DOCTYPE declaration generated by Tidy.<br />"
|
"This option specifies the DOCTYPE declaration generated by Tidy.<br />"
|
||||||
"If set to \"omit\" the output won't contain a DOCTYPE declaration.<br />"
|
"If set to \"omit\" the output won't contain a DOCTYPE declaration.<br />"
|
||||||
|
|
0
src/mappedio.c
Executable file → Normal file
0
src/mappedio.c
Executable file → Normal file
0
src/mappedio.h
Executable file → Normal file
0
src/mappedio.h
Executable file → Normal file
|
@ -29,6 +29,7 @@
|
||||||
#include "tidy-int.h"
|
#include "tidy-int.h"
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
#include "clean.h"
|
#include "clean.h"
|
||||||
|
#include "gdoc.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "message.h"
|
#include "message.h"
|
||||||
#include "pprint.h"
|
#include "pprint.h"
|
||||||
|
@ -1238,6 +1239,7 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
||||||
Bool word2K = cfgBool( doc, TidyWord2000 );
|
Bool word2K = cfgBool( doc, TidyWord2000 );
|
||||||
Bool logical = cfgBool( doc, TidyLogicalEmphasis );
|
Bool logical = cfgBool( doc, TidyLogicalEmphasis );
|
||||||
Bool clean = cfgBool( doc, TidyMakeClean );
|
Bool clean = cfgBool( doc, TidyMakeClean );
|
||||||
|
Bool gdoc = cfgBool( doc, TidyGDocClean );
|
||||||
Bool dropFont = cfgBool( doc, TidyDropFontTags );
|
Bool dropFont = cfgBool( doc, TidyDropFontTags );
|
||||||
Bool htmlOut = cfgBool( doc, TidyHtmlOut );
|
Bool htmlOut = cfgBool( doc, TidyHtmlOut );
|
||||||
Bool xmlOut = cfgBool( doc, TidyXmlOut );
|
Bool xmlOut = cfgBool( doc, TidyXmlOut );
|
||||||
|
@ -1278,6 +1280,10 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
||||||
if ( clean || dropFont )
|
if ( clean || dropFont )
|
||||||
TY_(CleanDocument)( doc );
|
TY_(CleanDocument)( doc );
|
||||||
|
|
||||||
|
/* clean up html exported by Google Focs */
|
||||||
|
if ( gdoc )
|
||||||
|
TY_(CleanGoogleDocument)( doc );
|
||||||
|
|
||||||
/* Move terminating <br /> tags from out of paragraphs */
|
/* Move terminating <br /> tags from out of paragraphs */
|
||||||
/*! Do we want to do this for all block-level elements? */
|
/*! Do we want to do this for all block-level elements? */
|
||||||
|
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/8025154";
|
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/45fce5e";
|
Loading…
Reference in a new issue