diff --git a/.gitignore b/.gitignore
index e69f6db..83177cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,11 @@
/htmldoc/tidy.1
/htmldoc/quickref.html
/lib/
+/autom4te.cache/
+/console/.deps/
+/console/.libs/
+/src/.deps/
+/src/.libs/
*.user
*.suo
*.sdf
diff --git a/build/gmake/Makefile b/build/gmake/Makefile
index 8640dff..69e8fda 100644
--- a/build/gmake/Makefile
+++ b/build/gmake/Makefile
@@ -145,7 +145,7 @@ OBJFILES=\
$(OBJDIR)/attrask$(OBJSUF) $(OBJDIR)/attrdict$(OBJSUF) $(OBJDIR)/attrget$(OBJSUF) \
$(OBJDIR)/buffio$(OBJSUF) $(OBJDIR)/fileio$(OBJSUF) $(OBJDIR)/streamio$(OBJSUF) \
$(OBJDIR)/tagask$(OBJSUF) $(OBJDIR)/tmbstr$(OBJSUF) $(OBJDIR)/utf8$(OBJSUF) \
- $(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF)
+ $(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF) $(OBJDIR)/gdoc$(OBJSUF)
CFILES= \
$(SRCDIR)/access.c $(SRCDIR)/attrs.c $(SRCDIR)/istack.c \
@@ -155,7 +155,7 @@ CFILES= \
$(SRCDIR)/attrask.c $(SRCDIR)/attrdict.c $(SRCDIR)/attrget.c \
$(SRCDIR)/buffio.c $(SRCDIR)/fileio.c $(SRCDIR)/streamio.c \
$(SRCDIR)/tagask.c $(SRCDIR)/tmbstr.c $(SRCDIR)/utf8.c \
- $(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c
+ $(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c $(SRCDIR)/gdoc.c
HFILES= $(INCDIR)/platform.h $(INCDIR)/tidy.h $(INCDIR)/tidyenum.h \
$(INCDIR)/buffio.h
@@ -167,7 +167,7 @@ LIBHFILES= \
$(SRCDIR)/mappedio.h $(SRCDIR)/message.h $(SRCDIR)/parser.h \
$(SRCDIR)/pprint.h $(SRCDIR)/streamio.h $(SRCDIR)/tags.h \
$(SRCDIR)/tmbstr.h $(SRCDIR)/utf8.h $(SRCDIR)/tidy-int.h \
- $(SRCDIR)/version.h
+ $(SRCDIR)/gdoc.h $(SRCDIR)/version.h
diff --git a/console/tidy.c b/console/tidy.c
index 76d6888..d821bde 100644
--- a/console/tidy.c
+++ b/console/tidy.c
@@ -181,6 +181,9 @@ static const CmdOptDesc cmdopt_defs[] = {
{ "-bare",
"strip out smart quotes and em dashes, etc.",
"bare: yes", CmdOptProcDir, "-b" },
+ { "-gdoc",
+ "produce clean version of html exported by google docs",
+ "gdoc: yes", CmdOptProcDir, "-g" },
{ "-numeric",
"output numeric rather than named entities",
"numeric-entities: yes", CmdOptProcDir, "-n" },
@@ -1010,6 +1013,9 @@ int main( int argc, char** argv )
else if ( strcasecmp(arg, "clean") == 0 )
tidyOptSetBool( tdoc, TidyMakeClean, yes );
+ else if ( strcasecmp(arg, "gdoc") == 0 )
+ tidyOptSetBool( tdoc, TidyGDocClean, yes );
+
else if ( strcasecmp(arg, "bare") == 0 )
tidyOptSetBool( tdoc, TidyMakeBare, yes );
@@ -1227,6 +1233,10 @@ int main( int argc, char** argv )
tidyOptSetBool( tdoc, TidyMakeClean, yes );
break;
+ case 'g':
+ tidyOptSetBool( tdoc, TidyGDocClean, yes );
+ break;
+
case 'b':
tidyOptSetBool( tdoc, TidyMakeBare, yes );
break;
diff --git a/include/tidyenum.h b/include/tidyenum.h
index f253942..d4174e7 100644
--- a/include/tidyenum.h
+++ b/include/tidyenum.h
@@ -126,6 +126,7 @@ typedef enum
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
TidyMakeClean, /**< Replace presentational clutter by style rules */
+ TidyGDocClean, /**< Clean up HTML exported from Google Docs */
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
TidyDropPropAttrs, /**< Discard proprietary attributes */
TidyDropFontTags, /**< Discard presentation tags */
diff --git a/quickref.html b/quickref.html
index a09d0a5..b4f7e3b 100644
--- a/quickref.html
+++ b/quickref.html
@@ -8,7 +8,7 @@
Quick Reference
HTML Tidy Configuration Options
- Version: https://github.com/w3c/tidy-html5/tree/d193420
+ Version: https://github.com/w3c/tidy-html5/tree/f212c3f
HTML, XHTML, XML
@@ -188,6 +188,13 @@
Boolean |
yes |
+
+
+ gdoc
+ |
+ Boolean |
+ no |
+
hide-comments
@@ -1134,6 +1141,25 @@
|
|
+
+ gdoc |
+
+ Top
+ |
+
+
+ Type: Boolean
+ Default: no Example: y/n, yes/no, t/f, true/false, 1/0 |
+
+ drop-font-tags
+ |
+
+
+ This option specifies if Tidy should enable specific behavior for cleaning up HTML exported fromGoogle Docs. |
+
+
+ |
+
diff --git a/src/attrs.c b/src/attrs.c
index 44abfc7..7472468 100644
--- a/src/attrs.c
+++ b/src/attrs.c
@@ -751,6 +751,27 @@ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
return attr;
}
+void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name )
+{
+ AttVal *attr, *prev = NULL, *next;
+
+ for (attr = node->attributes; attr != NULL; prev = attr, attr = next)
+ {
+ next = attr->next;
+
+ if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
+ {
+ if (prev)
+ prev->next = next;
+ else
+ node->attributes = next;
+
+ TY_(FreeAttribute)( doc, attr );
+ break;
+ }
+ }
+}
+
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value )
{
diff --git a/src/attrs.h b/src/attrs.h
index 7b06ab4..fb9a6af 100644
--- a/src/attrs.h
+++ b/src/attrs.h
@@ -87,6 +87,8 @@ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
+void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
+
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value );
diff --git a/src/config.c b/src/config.c
index f557e71..7a952cf 100644
--- a/src/config.c
+++ b/src/config.c
@@ -252,6 +252,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
{ TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
{ TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
+ { TidyGDocClean, MU, "gdoc", BL, no, ParseBool, boolPicks },
{ TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
{ TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
{ TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },
diff --git a/src/localize.c b/src/localize.c
index 5cd2063..a5c1d65 100644
--- a/src/localize.c
+++ b/src/localize.c
@@ -359,6 +359,8 @@ static const TidyOptionId TidyDropFontTagsLinks[] =
{ TidyMakeClean, TidyUnknownOption };
static const TidyOptionId TidyMakeCleanTagsLinks[] =
{ TidyDropFontTags, TidyUnknownOption };
+static const TidyOptionId TidyGDocCleanLinks[] =
+ { TidyMakeClean, TidyUnknownOption };
/* Documentation of options */
static const TidyOptionDoc option_docs[] =
@@ -405,6 +407,12 @@ static const TidyOptionDoc option_docs[] =
"on the HTML saved by Microsoft Office products. "
, TidyMakeCleanTagsLinks
},
+ {TidyGDocClean,
+ "This option specifies if Tidy "
+ "should enable specific behavior for cleaning up HTML exported from"
+ "Google Docs. "
+ , TidyMakeCleanTagsLinks
+ },
{TidyDoctype,
"This option specifies the DOCTYPE declaration generated by Tidy. "
"If set to \"omit\" the output won't contain a DOCTYPE declaration. "
diff --git a/src/mappedio.c b/src/mappedio.c
old mode 100755
new mode 100644
diff --git a/src/mappedio.h b/src/mappedio.h
old mode 100755
new mode 100644
diff --git a/src/tidylib.c b/src/tidylib.c
index 0ff8cd6..5756d19 100644
--- a/src/tidylib.c
+++ b/src/tidylib.c
@@ -29,6 +29,7 @@
#include "tidy-int.h"
#include "parser.h"
#include "clean.h"
+#include "gdoc.h"
#include "config.h"
#include "message.h"
#include "pprint.h"
@@ -1238,6 +1239,7 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
Bool word2K = cfgBool( doc, TidyWord2000 );
Bool logical = cfgBool( doc, TidyLogicalEmphasis );
Bool clean = cfgBool( doc, TidyMakeClean );
+ Bool gdoc = cfgBool( doc, TidyGDocClean );
Bool dropFont = cfgBool( doc, TidyDropFontTags );
Bool htmlOut = cfgBool( doc, TidyHtmlOut );
Bool xmlOut = cfgBool( doc, TidyXmlOut );
@@ -1278,6 +1280,10 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
if ( clean || dropFont )
TY_(CleanDocument)( doc );
+ /* clean up html exported by Google Focs */
+ if ( gdoc )
+ TY_(CleanGoogleDocument)( doc );
+
/* Move terminating tags from out of paragraphs */
/*! Do we want to do this for all block-level elements? */
diff --git a/src/version.h b/src/version.h
index 88bc5ff..dab783c 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/8025154";
\ No newline at end of file
+static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/45fce5e";
\ No newline at end of file
|