From bb1d62d3bdebcf95a1950d3bb717607012a15f83 Mon Sep 17 00:00:00 2001 From: Evgeniy Yurtaev Date: Wed, 22 Feb 2017 14:09:11 +0300 Subject: [PATCH 01/14] Fix leading white spaces trimming --- src/lexer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.c b/src/lexer.c index 210c9cf..e625209 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -4096,7 +4096,7 @@ static tmbstr ParseValue( TidyDocImpl* doc, ctmbstr name, while (TY_(IsWhite)(lexer->lexbuf[start+len-1]) && (len > 0)) --len; - while (TY_(IsWhite)(lexer->lexbuf[start]) && (start < len) && (len > 0)) + while (TY_(IsWhite)(lexer->lexbuf[start]) && (len > 0)) { ++start; --len; From 569ae4b4350ecbe94e6bcee3d9ed28eff5d3c69b Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Thu, 23 Feb 2017 15:27:03 +0100 Subject: [PATCH 02/14] Issue #329 - lexer.c - do not discard this newline here --- src/lexer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lexer.c b/src/lexer.c index 210c9cf..fb79e50 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -2986,7 +2986,9 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) { c = TY_(ReadChar)(doc->docIn); - if (c != '\n' && c != '\f') + if ((c == '\n') && (mode != IgnoreWhitespace)) /* Issue #329 - Can NOT afford to lose this newline */ + TY_(UngetChar)(c, doc->docIn); /* Issue #329 - make sure the newline is maintained for now */ + else if (c != '\n' && c != '\f') TY_(UngetChar)(c, doc->docIn); lexer->waswhite = yes; /* to swallow leading whitespace */ From b97b2f0d453a9f017d8dd3183f1d57711f212574 Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Thu, 23 Feb 2017 15:28:40 +0100 Subject: [PATCH 03/14] Issue #329 - version.txt - bump to 5.3.18 for this fix --- version.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version.txt b/version.txt index e20f7e1..5892180 100644 --- a/version.txt +++ b/version.txt @@ -1,2 +1,2 @@ -5.3.17 -2017.02.12 +5.3.18 +2017.02.23 From 27fe0548b9dfe18e6d6f263df9202558acf4c389 Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Thu, 23 Feb 2017 16:28:19 +0100 Subject: [PATCH 04/14] Issue #468 - config.c - use `RAW` encoding for all cases --- src/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index ddb677c..e4a3ea0 100644 --- a/src/config.c +++ b/src/config.c @@ -934,7 +934,7 @@ Bool TY_(ParseConfigValue)( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optval if (optId == TidyOutFile) doc->config.cfgIn = TY_(BufferInput)( doc, &inbuf, RAW ); else - doc->config.cfgIn = TY_(BufferInput)( doc, &inbuf, ASCII ); + doc->config.cfgIn = TY_(BufferInput)( doc, &inbuf, RAW ); /* Issue #468 - Was ASCII! */ doc->config.c = GetC( &doc->config ); status = option->parser( doc, option ); From 13c92bce3855209672b26e86ac9d79db3aea8661 Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Thu, 23 Feb 2017 16:29:44 +0100 Subject: [PATCH 05/14] Issue #468 - version.txt - Bump to 5.3.19 for this fix --- version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.txt b/version.txt index 5892180..2742ca7 100644 --- a/version.txt +++ b/version.txt @@ -1,2 +1,2 @@ -5.3.18 +5.3.19 2017.02.23 From c4b5904e1c194a01db52be2a4531207a246c12f1 Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Fri, 24 Feb 2017 14:38:20 +0100 Subject: [PATCH 06/14] Issue #497 - lexer.c - Add comment for this PR @seaburg --- src/lexer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lexer.c b/src/lexer.c index aee3e02..efefe74 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -4098,6 +4098,7 @@ static tmbstr ParseValue( TidyDocImpl* doc, ctmbstr name, while (TY_(IsWhite)(lexer->lexbuf[start+len-1]) && (len > 0)) --len; + /* Issue #497 - Fix leading space trimming */ while (TY_(IsWhite)(lexer->lexbuf[start]) && (len > 0)) { ++start; From d07134140a404d10dbf8819544424a33818b7bac Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Fri, 24 Feb 2017 14:39:46 +0100 Subject: [PATCH 07/14] Issue #497 - version.txt - Bump to 5.3.20 for this fix --- version.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version.txt b/version.txt index 2742ca7..76aab01 100644 --- a/version.txt +++ b/version.txt @@ -1,2 +1,2 @@ -5.3.19 -2017.02.23 +5.3.20 +2017.02.24 From a49890ee55025cb4a3097ad63a2fcd79459f2156 Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Fri, 24 Feb 2017 16:19:58 +0100 Subject: [PATCH 08/14] Issue #498 - parser.c - if a in a
just close. The previous action was to discard the second, while it is the second table that browsers will render. This conforms to the principle that the html output by tidy should render in a browser like the original html. --- src/parser.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index 30a9911..6ff2388 100644 --- a/src/parser.c +++ b/src/parser.c @@ -3019,9 +3019,22 @@ void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(m while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) { - if (node->tag == table->tag && node->type == EndTag) + if (node->tag == table->tag ) { - TY_(FreeNode)( doc, node); + if (node->type == EndTag) + { + TY_(FreeNode)(doc, node); + } + else + { + /* Issue #498 - If a
in a
+ * just close the current table, and issue a + * warning. The previous action was to discard + * this second
+ */ + TY_(UngetToken)(doc); + TY_(ReportError)(doc, table, node, TAG_NOT_ALLOWED_IN); + } lexer->istackbase = istackbase; table->closed = yes; #if !defined(NDEBUG) && defined(_MSC_VER) From bb2cb2637223ea3c03edae01ae95be4e84b1b679 Mon Sep 17 00:00:00 2001 From: Jim Derry Date: Fri, 24 Feb 2017 11:58:30 -0500 Subject: [PATCH 09/14] Update README documents with information for the upcoming 5.4.0 release. --- README.md | 99 ++++++++++++++++++++-- README/ATTRIBUTES.md | 19 +++-- README/BRANCHES.md | 28 +++++++ README/BUILD.md | 66 +++++++++++++++ README/CODESTYLE.md | 12 +-- README/CONTRIBUTING.md | 40 +++++---- README/ELEMENTS.md | 19 ----- README/LOCALIZE.md | 4 +- README/MESSAGES.md | 1 - README/OPTIONS.md | 8 +- README/README.html | 181 +++++++++++++---------------------------- README/README.md | 102 ----------------------- README/TAGS.md | 19 +++++ README/VERSION.md | 14 ++-- localize/README.md | 2 +- 15 files changed, 318 insertions(+), 296 deletions(-) create mode 100644 README/BRANCHES.md create mode 100644 README/BUILD.md delete mode 100644 README/ELEMENTS.md delete mode 100644 README/README.md create mode 100644 README/TAGS.md diff --git a/README.md b/README.md index 8661afe..bf48c56 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,96 @@ -# HTML Tidy with HTML5 support +# HTACG HTML Tidy -All READMEs and related materials can be found in [README/][1]. +All other READMEs and related materials can be found in [README/][100]. Although all of our materials should be linked in this README, be sure to check this directory for documents we’ve not yet added to this document. -For build instructions please see [README/README.md][2]. +## Building HTML Tidy - [1]: https://github.com/htacg/tidy-html5/tree/master/README - [2]: https://github.com/htacg/tidy-html5/blob/master/README/README.md - \ No newline at end of file + - For build instructions please see [README/BUILD.md][115]. + +## Branches and Versions + +Learn about which branches are available, which branch you should use, and how HTML Tidy’s versioning scheme works. + + - Learn about version numbering in [README/VERSION.md][160]. + - Learn about our repository branches in [README/BRANCHES.md][110]. + +## Contributing and Development Guides + +We gladly accept PRs! Read about some of our contribution guidelines, and check out some of the additional explanatory documents that will aid your understanding of how to accomplish certain things in HTML Tidy. + +### General Contribution Guidelines + +These are some general guidelines that will help you help us when it comes to making your own contributions to HTML Tidy. + + - Learn about our contributing guidelines in [README/CONTRIBUTING.md][125]. + - Understand HTML Tidy’s source code style in [README/CODESTYLE.md][120]. + +### Adding Features Guides + +When you’re ready to add a great new feature, these write-ups may be useful. + + - Learn how to add new element attributes to HTML Tidy by reading [README/ATTRIBUTES.md][105]. + - Discover how to add new tags to Tidy in [README/TAGS.md][130]. + - If you want to add new messages to Tidy, read [README/MESSAGE.md][150]. + - Configuration options can be added according to [README/OPTIONS.md][155]. + +### Language Localization Guides + +Tidy supports localization, and welcomes translations into various languages. Please read up on how to localize HTML Tidy. + + - The general README for localizing can be found in [/README/LOCALIZE.md][140]. + - And [/localize/README.md][145] contains specific instructions for localizing. + + +## Other Important Links + + - site: [http://www.html-tidy.org/][4] + - source: [https://github.com/htacg/tidy-html5][5] + - binaries: [http://binaries.html-tidy.org][6] + - bugs: [https://github.com/htacg/tidy-html5/issues][7] + - list: [https://lists.w3.org/Archives/Public/html-tidy/][8] + - api and quickref: [http://api.html-tidy.org/][9] + + [4]: http://www.html-tidy.org/ + [5]: https://github.com/htacg/tidy-html5 + [6]: http://binaries.html-tidy.org + [7]: https://github.com/htacg/tidy-html5/issues + [8]: https://lists.w3.org/Archives/Public/html-tidy/ + [9]: http://api.html-tidy.org/ + + +## History + +This repository should be considered canonical for HTML Tidy as of 2015-January-15. + + - This repository originally transferred from [w3c.github.com/tidy-html5][20], now redirected to the current site. + + - First moved to Github from [tidy.sourceforge.net][21]. Note, this site is kept only for historic reasons, and is not now well maintained. + +**Tidy is the granddaddy of HTML tools, with support for modern standards.** Have fun... + + [20]: http://w3c.github.com/tidy-html5/ + [21]: http://tidy.sourceforge.net + + +## License + +HTML Tidy and LibTidy are free and open source software with a permissive license. + + - You can read the complete license in [README/LICENSE.md][135]. + + + + [100]: README/ + [105]: README/ATTRIBUTES.md + [110]: README/BRANCHES.md + [115]: README/BUILD.md + [120]: README/CODESTYLE.md + [125]: README/CONTRIBUTING.md + [130]: README/TAGS.md + [135]: README/LICENSE.md + [140]: /README/LOCALIZE.md + [145]: /localize/README.md + [150]: README/MESSAGE.md + [155]: README/OPTIONS.md + [160]: README/VERSION.md + diff --git a/README/ATTRIBUTES.md b/README/ATTRIBUTES.md index b4ef111..79ec167 100644 --- a/README/ATTRIBUTES.md +++ b/README/ATTRIBUTES.md @@ -1,21 +1,26 @@ # Tidy Element Attributes -This is about adding a **new** `attribute=value` for one or more html `element`, here called `tags`. +This is about adding a **new** HTML attribute to one or more HTML tags, i.e., a new attribute such as `attribute=value`. -Tidy supports a large number of `attributes`, first defined in `tidyenum.h`, to give it a value, then defined in `attrs.c` to give it a unique **string** name, and a `function` to verify the atrribute **value**. Then in `attrdict.c` the attribute is defined, giving what version(s) of html support this attribute. Finally, what tags support this attrinute, is done in `tags.c`, where each attribute is allowed on that tag, or not, in the `tag_defs[]` table. +Tidy’s large number of attributes are supported via number of files: + + - `tidyenum.h` is where you first define a new attribute in order to give it an internal value. + - `attrs.c` is where you give a unique **string** name to the attribute, as well as a **function** to verify the **value**. + - `attrdict.c` further refines the definition of your attribute, specifying which version(s) of HTML support this attribute. + - `tags.c`, finally, determines which tags support the attribute, in the `tag_defs[]` table. So, to add a new `attribute=value`, on one or more existing tags, consists of the following simple steps - - 1. tidyenum.h - Give the attribute an internal name, like `TidyAttr_XXXX`, and thus a value. While there were some initial steps to keep this `TidyAttrId` enumeration alphabetic, now just add the new `TidyAttr_XXXX` just before the last entry 'N_TIDY_ATTRIBS'. + 1. `tidyenum.h` - Give the attribute an internal name, like `TidyAttr_XXXX`, and thus a value. While there were some initial steps to keep this `TidyAttrId` enumeration alphabetic, now just add the new `TidyAttr_XXXX` just before the last entry `N_TIDY_ATTRIBS`. - 2. attrs.c - Assign the string value of the attribute. Of course this must be unique. And then assign a `function` to verify the attribute value. There are already a considerable number of defined functions to verify specific attribute values, but maybe this new attribute requires a new function, so that should be written, and defined. + 2. `attrs.c` - Assign the string value of the attribute. Of course this must be unique. And then assign a `function` to verify the attribute value. There are already a considerable number of defined functions to verify specific attribute values, but maybe this new attribute requires a new function, so that should be written, and defined. - 3. attrdict.c - If this attribute only relates to specific `tags`, then it should be added to their list. There are some `general` attributes that are allowed on every, or most tags, so this new attribute and value should be added accordingly. + 3. `attrdict.c` - If this attribute only relates to specific tags, then it should be added to their list. There are some general attributes that are allowed on every, or most tags, so this new attribute and value should be added accordingly. - 4. tags.c - Now the new attribute will be verified for each tag it is associate with in the `tag_defs[]` table. Like for example the `