Let's actually commit the -gdoc feature this time.
This commit is contained in:
parent
45fce5e3c2
commit
a772bbb17f
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -12,6 +12,11 @@
|
|||
/htmldoc/tidy.1
|
||||
/htmldoc/quickref.html
|
||||
/lib/
|
||||
/autom4te.cache/
|
||||
/console/.deps/
|
||||
/console/.libs/
|
||||
/src/.deps/
|
||||
/src/.libs/
|
||||
*.user
|
||||
*.suo
|
||||
*.sdf
|
||||
|
|
|
@ -145,7 +145,7 @@ OBJFILES=\
|
|||
$(OBJDIR)/attrask$(OBJSUF) $(OBJDIR)/attrdict$(OBJSUF) $(OBJDIR)/attrget$(OBJSUF) \
|
||||
$(OBJDIR)/buffio$(OBJSUF) $(OBJDIR)/fileio$(OBJSUF) $(OBJDIR)/streamio$(OBJSUF) \
|
||||
$(OBJDIR)/tagask$(OBJSUF) $(OBJDIR)/tmbstr$(OBJSUF) $(OBJDIR)/utf8$(OBJSUF) \
|
||||
$(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF)
|
||||
$(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF) $(OBJDIR)/gdoc$(OBJSUF)
|
||||
|
||||
CFILES= \
|
||||
$(SRCDIR)/access.c $(SRCDIR)/attrs.c $(SRCDIR)/istack.c \
|
||||
|
@ -155,7 +155,7 @@ CFILES= \
|
|||
$(SRCDIR)/attrask.c $(SRCDIR)/attrdict.c $(SRCDIR)/attrget.c \
|
||||
$(SRCDIR)/buffio.c $(SRCDIR)/fileio.c $(SRCDIR)/streamio.c \
|
||||
$(SRCDIR)/tagask.c $(SRCDIR)/tmbstr.c $(SRCDIR)/utf8.c \
|
||||
$(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c
|
||||
$(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c $(SRCDIR)/gdoc.c
|
||||
|
||||
HFILES= $(INCDIR)/platform.h $(INCDIR)/tidy.h $(INCDIR)/tidyenum.h \
|
||||
$(INCDIR)/buffio.h
|
||||
|
@ -167,7 +167,7 @@ LIBHFILES= \
|
|||
$(SRCDIR)/mappedio.h $(SRCDIR)/message.h $(SRCDIR)/parser.h \
|
||||
$(SRCDIR)/pprint.h $(SRCDIR)/streamio.h $(SRCDIR)/tags.h \
|
||||
$(SRCDIR)/tmbstr.h $(SRCDIR)/utf8.h $(SRCDIR)/tidy-int.h \
|
||||
$(SRCDIR)/version.h
|
||||
$(SRCDIR)/gdoc.h $(SRCDIR)/version.h
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -181,6 +181,9 @@ static const CmdOptDesc cmdopt_defs[] = {
|
|||
{ "-bare",
|
||||
"strip out smart quotes and em dashes, etc.",
|
||||
"bare: yes", CmdOptProcDir, "-b" },
|
||||
{ "-gdoc",
|
||||
"produce clean version of html exported by google docs",
|
||||
"gdoc: yes", CmdOptProcDir, "-g" },
|
||||
{ "-numeric",
|
||||
"output numeric rather than named entities",
|
||||
"numeric-entities: yes", CmdOptProcDir, "-n" },
|
||||
|
@ -1010,6 +1013,9 @@ int main( int argc, char** argv )
|
|||
else if ( strcasecmp(arg, "clean") == 0 )
|
||||
tidyOptSetBool( tdoc, TidyMakeClean, yes );
|
||||
|
||||
else if ( strcasecmp(arg, "gdoc") == 0 )
|
||||
tidyOptSetBool( tdoc, TidyGDocClean, yes );
|
||||
|
||||
else if ( strcasecmp(arg, "bare") == 0 )
|
||||
tidyOptSetBool( tdoc, TidyMakeBare, yes );
|
||||
|
||||
|
@ -1227,6 +1233,10 @@ int main( int argc, char** argv )
|
|||
tidyOptSetBool( tdoc, TidyMakeClean, yes );
|
||||
break;
|
||||
|
||||
case 'g':
|
||||
tidyOptSetBool( tdoc, TidyGDocClean, yes );
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
tidyOptSetBool( tdoc, TidyMakeBare, yes );
|
||||
break;
|
||||
|
|
|
@ -126,6 +126,7 @@ typedef enum
|
|||
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
|
||||
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
|
||||
TidyMakeClean, /**< Replace presentational clutter by style rules */
|
||||
TidyGDocClean, /**< Clean up HTML exported from Google Docs */
|
||||
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
|
||||
TidyDropPropAttrs, /**< Discard proprietary attributes */
|
||||
TidyDropFontTags, /**< Discard presentation tags */
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
<body>
|
||||
<h1 id="top">Quick Reference</h1>
|
||||
<h2>HTML Tidy Configuration Options</h2>
|
||||
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/d193420">https://github.com/w3c/tidy-html5/tree/d193420</a></p>
|
||||
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/f212c3f">https://github.com/w3c/tidy-html5/tree/f212c3f</a></p>
|
||||
<p>
|
||||
<a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a>
|
||||
<br />
|
||||
|
@ -188,6 +188,13 @@
|
|||
<td>Boolean</td>
|
||||
<td>yes</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<a href="#gdoc">gdoc</a>
|
||||
</td>
|
||||
<td>Boolean</td>
|
||||
<td>no</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<a href="#hide-comments">hide-comments</a>
|
||||
|
@ -1134,6 +1141,25 @@
|
|||
<tr>
|
||||
<td> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tabletitle" valign="top" id="gdoc">gdoc</td>
|
||||
<td class="tabletitlelink" valign="top" align="right">
|
||||
<a href="#top">Top</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top">Type: <strong>Boolean</strong><br />
|
||||
Default: <strong>no</strong><br />Example: <strong>y/n, yes/no, t/f, true/false, 1/0</strong></td>
|
||||
<td align="right" valign="top">
|
||||
<a href="#drop-font-tags">drop-font-tags</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2">This option specifies if Tidy should enable specific behavior for cleaning up HTML exported fromGoogle Docs. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tabletitle" valign="top" id="hide-comments">hide-comments</td>
|
||||
<td class="tabletitlelink" valign="top" align="right">
|
||||
|
|
21
src/attrs.c
21
src/attrs.c
|
@ -751,6 +751,27 @@ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
|
|||
return attr;
|
||||
}
|
||||
|
||||
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name )
|
||||
{
|
||||
AttVal *attr, *prev = NULL, *next;
|
||||
|
||||
for (attr = node->attributes; attr != NULL; prev = attr, attr = next)
|
||||
{
|
||||
next = attr->next;
|
||||
|
||||
if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
|
||||
{
|
||||
if (prev)
|
||||
prev->next = next;
|
||||
else
|
||||
node->attributes = next;
|
||||
|
||||
TY_(FreeAttribute)( doc, attr );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
||||
Node *node, ctmbstr name, ctmbstr value )
|
||||
{
|
||||
|
|
|
@ -87,6 +87,8 @@ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
|
|||
|
||||
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
|
||||
|
||||
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
|
||||
|
||||
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
||||
Node *node, ctmbstr name, ctmbstr value );
|
||||
|
||||
|
|
|
@ -252,6 +252,7 @@ static const TidyOptionImpl option_defs[] =
|
|||
{ TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
|
||||
{ TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
|
||||
{ TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
|
||||
{ TidyGDocClean, MU, "gdoc", BL, no, ParseBool, boolPicks },
|
||||
{ TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
|
||||
{ TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
|
||||
{ TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },
|
||||
|
|
|
@ -359,6 +359,8 @@ static const TidyOptionId TidyDropFontTagsLinks[] =
|
|||
{ TidyMakeClean, TidyUnknownOption };
|
||||
static const TidyOptionId TidyMakeCleanTagsLinks[] =
|
||||
{ TidyDropFontTags, TidyUnknownOption };
|
||||
static const TidyOptionId TidyGDocCleanLinks[] =
|
||||
{ TidyMakeClean, TidyUnknownOption };
|
||||
|
||||
/* Documentation of options */
|
||||
static const TidyOptionDoc option_docs[] =
|
||||
|
@ -405,6 +407,12 @@ static const TidyOptionDoc option_docs[] =
|
|||
"on the HTML saved by Microsoft Office products. "
|
||||
, TidyMakeCleanTagsLinks
|
||||
},
|
||||
{TidyGDocClean,
|
||||
"This option specifies if Tidy "
|
||||
"should enable specific behavior for cleaning up HTML exported from"
|
||||
"Google Docs. "
|
||||
, TidyMakeCleanTagsLinks
|
||||
},
|
||||
{TidyDoctype,
|
||||
"This option specifies the DOCTYPE declaration generated by Tidy.<br />"
|
||||
"If set to \"omit\" the output won't contain a DOCTYPE declaration.<br />"
|
||||
|
|
0
src/mappedio.c
Executable file → Normal file
0
src/mappedio.c
Executable file → Normal file
0
src/mappedio.h
Executable file → Normal file
0
src/mappedio.h
Executable file → Normal file
|
@ -29,6 +29,7 @@
|
|||
#include "tidy-int.h"
|
||||
#include "parser.h"
|
||||
#include "clean.h"
|
||||
#include "gdoc.h"
|
||||
#include "config.h"
|
||||
#include "message.h"
|
||||
#include "pprint.h"
|
||||
|
@ -1238,6 +1239,7 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
|||
Bool word2K = cfgBool( doc, TidyWord2000 );
|
||||
Bool logical = cfgBool( doc, TidyLogicalEmphasis );
|
||||
Bool clean = cfgBool( doc, TidyMakeClean );
|
||||
Bool gdoc = cfgBool( doc, TidyGDocClean );
|
||||
Bool dropFont = cfgBool( doc, TidyDropFontTags );
|
||||
Bool htmlOut = cfgBool( doc, TidyHtmlOut );
|
||||
Bool xmlOut = cfgBool( doc, TidyXmlOut );
|
||||
|
@ -1278,6 +1280,10 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
|||
if ( clean || dropFont )
|
||||
TY_(CleanDocument)( doc );
|
||||
|
||||
/* clean up html exported by Google Focs */
|
||||
if ( gdoc )
|
||||
TY_(CleanGoogleDocument)( doc );
|
||||
|
||||
/* Move terminating <br /> tags from out of paragraphs */
|
||||
/*! Do we want to do this for all block-level elements? */
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/8025154";
|
||||
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/45fce5e";
|
Loading…
Reference in a new issue