From 1c4d43ad2af65a82a8b2a2f085293bb47357f561 Mon Sep 17 00:00:00 2001 From: "Michael[tm] Smith" Date: Thu, 1 Mar 2012 17:22:03 +0900 Subject: [PATCH] Deal with version reporting better. --- Makefile | 8 +- README.html | 4 +- README.md | 9 +- VERSION | 1 - console/tidy.c | 33 +- htmldoc/quickref.html | 2322 +++++++++++++++++++++++++++++++++++++++++ htmldoc/tidy1.xsl | 73 +- src/lexer.c | 4 +- src/version.h | 15 +- src/version.h~ | 14 - 10 files changed, 2398 insertions(+), 85 deletions(-) delete mode 100644 VERSION create mode 100644 htmldoc/quickref.html mode change 100755 => 100644 src/version.h delete mode 100755 src/version.h~ diff --git a/Makefile b/Makefile index 74a3806..b48bf13 100644 --- a/Makefile +++ b/Makefile @@ -2,16 +2,17 @@ HTML2MARKDOWN=html2text GIT=git GITFLAGS= -all: bin/tidy README.md VERSION +all: README.md src/version.h bin/tidy bin/tidy: $(MAKE) -C build/gmake + $(MAKE) -C build/gmake doc README.md: README.html $(HTML2MARKDOWN) $(HTML2MARKDOWNFLAGS) $< > $@ -VERSION: - $(GIT) $(GITFLAGS) log --pretty=format:'https://github.com/w3c/tidy-html5/tree/%h' -n 1 > $@ +src/version.h: + $(GIT) $(GITFLAGS) log --pretty=format:'static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/%h";' -n 1 > $@ install: sudo $(MAKE) install -C build/gmake @@ -19,3 +20,4 @@ install: clean: $(MAKE) clean -C build/gmake $(RM) README.md + $(RM) src/version.h diff --git a/README.html b/README.html index 14be41b..e898fcc 100644 --- a/README.html +++ b/README.html @@ -7,8 +7,8 @@ -

Tidy for HTML5

-

This repo is a fork of the code from +

HTML Tidy for HTML5 (experimental)

+

This repo is an experimental fork of the code from tidy.sourceforge.net. This source code in this version supports processing of HTML5 documents. The changes for HTML5 support started from a diff --git a/README.md b/README.md index a2d3174..3216fc0 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ -# Tidy for HTML5 +# HTML Tidy for HTML5 (experimental) -This repo is a fork of the code from [tidy.sourceforge.net][1]. This source -code in this version supports processing of HTML5 documents. The changes for -HTML5 support started from a [patch developed by Björn Höhrmann][2]. +This repo is an experimental fork of the code from [tidy.sourceforge.net][1]. +This source code in this version supports processing of HTML5 documents. The +changes for HTML5 support started from a [patch developed by Björn +Höhrmann][2]. [1]: http://tidy.sourceforge.net diff --git a/VERSION b/VERSION deleted file mode 100644 index 3872684..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -https://github.com/w3c/tidy-html5/tree/35f2516 diff --git a/console/tidy.c b/console/tidy.c index 494abc0..2a90fc6 100644 --- a/console/tidy.c +++ b/console/tidy.c @@ -416,14 +416,16 @@ static void help( ctmbstr prog ) { printf( "%s [option...] [file...] [option...] [file...]\n", prog ); printf( "Utility to clean up and pretty print HTML/XHTML/XML\n"); - printf( "See http://tidy.sourceforge.net/\n"); + printf( "\n"); + + printf( "This is an HTML5-aware experimental fork of HTML Tidy.\n"); + printf( "%s\n", tidyReleaseDate() ); printf( "\n"); #ifdef PLATFORM_NAME - printf( "Options for HTML Tidy for %s from https://github.com/w3c/tidy-html5\n", - PLATFORM_NAME); + printf( "Options for HTML Tidy for %s:\n", PLATFORM_NAME ); #else - printf( "Options for HTML Tidy from https://github.com/w3c/tidy-html5\n"); + printf( "Options for HTML Tidy:\n"); #endif printf( "\n"); @@ -434,9 +436,24 @@ static void help( ctmbstr prog ) "to the man page.\n\n"); printf( "Input/Output default to stdin/stdout respectively.\n"); + printf( "\n"); printf( "Single letter options apart from -f may be combined\n"); printf( "as in: tidy -f errs.txt -imu foo.html\n"); - printf( "For further info on HTML see http://www.w3.org/MarkUp\n"); + printf( "\n"); + printf( "For more information on HTML, see the following:\n" ); + printf( "\n"); + printf( " HTML: Edition for Web Authors (the latest HTML specification)\n"); + printf( " http://dev.w3.org/html5/spec-author-view\n" ); + printf( "\n"); + printf( " HTML: The Markup Language (an HTML language reference)\n" ); + printf( " http://dev.w3.org/html5/markup/\n" ); + printf( "\n"); + printf( "File bug reports at https://github.com/w3c/tidy-html5/issues/\n" ); + printf( "or send questions and comments to html-tidy@w3.org\n" ); + printf( "\n"); + printf( "Validate your HTML documents using the W3C Nu Markup Validator:\n" ); + printf( "\n"); + printf( " http://validator.w3.org/nu/" ); printf( "\n"); } @@ -903,10 +920,10 @@ static void optionvalues( TidyDoc tdoc ) static void version( void ) { #ifdef PLATFORM_NAME - printf( "HTML Tidy for %s from https://github.com/w3c/tidy-html5\n", - PLATFORM_NAME); + printf( "HTML Tidy for HTML5 (experimental) for %s %s\n", + PLATFORM_NAME, tidyReleaseDate() ); #else - printf( "HTML Tidy from https://github.com/w3c/tidy-html5\n"); + printf( "HTML Tidy for HTML5 (experimental) %s\n", tidyReleaseDate() ); #endif } diff --git a/htmldoc/quickref.html b/htmldoc/quickref.html new file mode 100644 index 0000000..6d7671a --- /dev/null +++ b/htmldoc/quickref.html @@ -0,0 +1,2322 @@ + + + + + HTML Tidy Configuration Options Quick Reference + + + +

Quick Reference

+

HTML Tidy Configuration Options

+

+ Generated automatically with HTML Tidy released + on https://github.com/w3c/tidy-html5/tree/82bada2. +

+

+ HTML, XHTML, XML +
+ Diagnostics +
+ Pretty Print +
+ Character Encoding +
+ Miscellaneous +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
HTML, XHTML, XML Options + Top +
OptionTypeDefault
+ add-xml-decl + Booleanno
+ add-xml-space + Booleanno
+ alt-text + String + - +
+ anchor-as-name + Booleanyes
+ assume-xml-procins + Booleanno
+ bare + Booleanno
+ clean + Booleanno
+ css-prefix + String + - +
+ decorate-inferred-ul + Booleanno
+ doctype + DocTypeauto
+ drop-empty-paras + Booleanyes
+ drop-font-tags + Booleanno
+ drop-proprietary-attributes + Booleanno
+ enclose-block-text + Booleanno
+ enclose-text + Booleanno
+ escape-cdata + Booleanno
+ fix-backslash + Booleanyes
+ fix-bad-comments + Booleanyes
+ fix-uri + Booleanyes
+ hide-comments + Booleanno
+ hide-endtags + Booleanno
+ indent-cdata + Booleanno
+ input-xml + Booleanno
+ join-classes + Booleanno
+ join-styles + Booleanyes
+ literal-attributes + Booleanno
+ logical-emphasis + Booleanno
+ lower-literals + Booleanyes
+ merge-divs + AutoBoolauto
+ merge-spans + AutoBoolauto
+ ncr + Booleanyes
+ new-blocklevel-tags + Tag names + - +
+ new-empty-tags + Tag names + - +
+ new-inline-tags + Tag names + - +
+ new-pre-tags + Tag names + - +
+ numeric-entities + Booleanno
+ output-html + Booleanno
+ output-xhtml + Booleanno
+ output-xml + Booleanno
+ preserve-entities + Booleanno
+ quote-ampersand + Booleanyes
+ quote-marks + Booleanno
+ quote-nbsp + Booleanyes
+ repeated-attributes + enumkeep-last
+ replace-color + Booleanno
+ show-body-only + AutoBoolno
+ uppercase-attributes + Booleanno
+ uppercase-tags + Booleanno
+ word-2000 + Booleanno
 
Diagnostics Options + Top +
OptionTypeDefault
+ accessibility-check + enum0 (Tidy Classic)
+ show-errors + Integer6
+ show-warnings + Booleanyes
 
Pretty Print Options + Top +
OptionTypeDefault
+ break-before-br + Booleanno
+ indent + AutoBoolno
+ indent-attributes + Booleanno
+ indent-spaces + Integer2
+ markup + Booleanyes
+ punctuation-wrap + Booleanno
+ sort-attributes + enumnone
+ split + Booleanno
+ tab-size + Integer8
+ vertical-space + Booleanno
+ wrap + Integer68
+ wrap-asp + Booleanyes
+ wrap-attributes + Booleanno
+ wrap-jste + Booleanyes
+ wrap-php + Booleanyes
+ wrap-script-literals + Booleanno
+ wrap-sections + Booleanyes
 
Character Encoding Options + Top +
OptionTypeDefault
+ ascii-chars + Booleanno
+ char-encoding + Encodingutf8
+ input-encoding + Encodingutf8
+ language + String + - +
+ newline + enum + Platform dependent +
+ output-bom + AutoBoolauto
+ output-encoding + Encodingutf8
 
Miscellaneous Options + Top +
OptionTypeDefault
+ error-file + String + - +
+ force-output + Booleanno
+ gnu-emacs + Booleanno
+ gnu-emacs-file + String + - +
+ keep-time + Booleanno
+ output-file + String + - +
+ quiet + Booleanno
+ slide-style + String + - +
+ tidy-mark + Booleanyes
+ write-back + Booleanno
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 
HTML, XHTML, XML Options Reference +
 
add-xml-decl
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
+ char-encoding +
+ output-encoding +
This option specifies if Tidy should add the XML declaration when outputting XML or XHTML. Note that if the input already includes an <?xml ... ?> declaration then this option will be ignored. If the encoding for the output is different from "ascii", one of the utf encodings or "raw", the declaration is always added as required by the XML standard.
 
add-xml-space
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should add xml:space="preserve" to elements such as <PRE>, <STYLE> and <SCRIPT> when generating XML. This is needed if the whitespace in such elements is to be parsed appropriately without having access to the DTD.
 
alt-text
Type: String
+ Default: -
Example: -
This option specifies the default "alt=" text Tidy uses for <IMG> attributes. This feature is dangerous as it suppresses further accessibility warnings. You are responsible for making your documents accessible to people who can not see the images!
 
anchor-as-name
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option controls the deletion or addition of the name attribute in elements where it can serve as anchor. If set to "yes", a name attribute, if not already existing, is added along an existing id attribute if the DTD allows it. If set to "no", any existing name attribute is removed if an id attribute exists or has been added.
 
assume-xml-procins
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should change the parsing of processing instructions to require ?> as the terminator rather than >. This option is automatically set if the input is in XML.
 
bare
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should strip Microsoft specific HTML from Word 2000 documents, and output spaces rather than non-breaking spaces where they exist in the input.
 
clean
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
+ drop-font-tags +
This option specifies if Tidy should strip out surplus presentational tags and attributes replacing them by style rules and structural markup as appropriate. It works well on the HTML saved by Microsoft Office products.
 
css-prefix
Type: String
+ Default: -
Example: -
This option specifies the prefix that Tidy uses for styles rules. By default, "c" will be used.
 
decorate-inferred-ul
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should decorate inferred UL elements with some CSS markup to avoid indentation to the right.
 
doctype
Type: DocType
+ Default: auto
Example: omit, auto, strict, transitional, user
This option specifies the DOCTYPE declaration generated by Tidy. If set to "omit" the output won't contain a DOCTYPE declaration. If set to "auto" (the default) Tidy will use an educated guess based upon the contents of the document. If set to "strict", Tidy will set the DOCTYPE to the strict DTD. If set to "loose", the DOCTYPE is set to the loose (transitional) DTD. Alternatively, you can supply a string for the formal public identifier (FPI).

For example:
doctype: "-//ACME//DTD HTML 3.14159//EN"

If you specify the FPI for an XHTML document, Tidy will set the system identifier to an empty string. For an HTML document, Tidy adds a system identifier only if one was already present in order to preserve the processing mode of some browsers. Tidy leaves the DOCTYPE for generic XML documents unchanged. --doctype omit implies --numeric-entities yes. This option does not offer a validation of the document conformance.
 
drop-empty-paras
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should discard empty paragraphs.
 
drop-font-tags
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
+ clean +
This option specifies if Tidy should discard <FONT> and <CENTER> tags without creating the corresponding style rules. This option can be set independently of the clean option.
 
drop-proprietary-attributes
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should strip out proprietary attributes, such as MS data binding attributes.
 
enclose-block-text
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should insert a <P> element to enclose any text it finds in any element that allows mixed content for HTML transitional but not HTML strict.
 
enclose-text
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should enclose any text it finds in the body element within a <P> element. This is useful when you want to take existing HTML and use it with a style sheet.
 
escape-cdata
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should convert <![CDATA[]]> sections to normal text.
 
fix-backslash
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should replace backslash characters "\" in URLs by forward slashes "/".
 
fix-bad-comments
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should replace unexpected hyphens with "=" characters when it comes across adjacent hyphens. The default is yes. This option is provided for users of Cold Fusion which uses the comment syntax: <!--- --->
 
fix-uri
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should check attribute values that carry URIs for illegal characters and if such are found, escape them as HTML 4 recommends.
 
hide-comments
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should print out comments.
 
hide-endtags
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should omit optional end-tags when generating the pretty printed markup. This option is ignored if you are outputting to XML.
 
indent-cdata
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should indent <![CDATA[]]> sections.
 
input-xml
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should use the XML parser rather than the error correcting HTML parser.
 
join-classes
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
+ join-styles +
+ repeated-attributes +
This option specifies if Tidy should combine class names to generate a single new class name, if multiple class assignments are detected on an element.
 
join-styles
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
+ join-classes +
+ repeated-attributes +
This option specifies if Tidy should combine styles to generate a single new style, if multiple style values are detected on an element.
 
literal-attributes
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should ensure that whitespace characters within attribute values are passed through unchanged.
 
logical-emphasis
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should replace any occurrence of <I> by <EM> and any occurrence of <B> by <STRONG>. In both cases, the attributes are preserved unchanged. This option can be set independently of the clean and drop-font-tags options.
 
lower-literals
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should convert the value of an attribute that takes a list of predefined values to lower case. This is required for XHTML documents.
 
merge-divs
Type: AutoBool
+ Default: auto
Example: auto, y/n, yes/no, t/f, true/false, 1/0
+ clean +
+ merge-spans +
Can be used to modify behavior of -c (--clean yes) option. This option specifies if Tidy should merge nested <div> such as "<div><div>...</div></div>". If set to "auto", the attributes of the inner <div> are moved to the outer one. As well, nested <div> with ID attributes are not merged. If set to "yes", the attributes of the inner <div> are discarded with the exception of "class" and "style".
 
merge-spans
Type: AutoBool
+ Default: auto
Example: auto, y/n, yes/no, t/f, true/false, 1/0
+ clean +
+ merge-divs +
Can be used to modify behavior of -c (--clean yes) option. This option specifies if Tidy should merge nested <span> such as "<span><span>...</span></span>". The algorithm is identical to the one used by --merge-divs.
 
ncr
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should allow numeric character references.
 
new-blocklevel-tags
Type: Tag names
+ Default: -
Example: tagX, tagY, ...
+ new-empty-tags +
+ new-inline-tags +
+ new-pre-tags +
This option specifies new block-level tags. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. Note you can't change the content model for elements such as <TABLE>, <UL>, <OL> and <DL>. This option is ignored in XML mode.
 
new-empty-tags
Type: Tag names
+ Default: -
Example: tagX, tagY, ...
+ new-blocklevel-tags +
+ new-inline-tags +
+ new-pre-tags +
This option specifies new empty inline tags. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. Remember to also declare empty tags as either inline or blocklevel. This option is ignored in XML mode.
 
new-inline-tags
Type: Tag names
+ Default: -
Example: tagX, tagY, ...
+ new-blocklevel-tags +
+ new-empty-tags +
+ new-pre-tags +
This option specifies new non-empty inline tags. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. This option is ignored in XML mode.
 
new-pre-tags
Type: Tag names
+ Default: -
Example: tagX, tagY, ...
+ new-blocklevel-tags +
+ new-empty-tags +
+ new-inline-tags +
This option specifies new tags that are to be processed in exactly the same way as HTML's <PRE> element. This option takes a space or comma separated list of tag names. Unless you declare new tags, Tidy will refuse to generate a tidied file if the input includes previously unknown tags. Note you can not as yet add new CDATA elements (similar to <SCRIPT>). This option is ignored in XML mode.
 
numeric-entities
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
+ doctype +
+ preserve-entities +
This option specifies if Tidy should output entities other than the built-in HTML entities (&amp;, &lt;, &gt; and &quot;) in the numeric rather than the named entity form. Only entities compatible with the DOCTYPE declaration generated are used. Entities that can be represented in the output encoding are translated correspondingly.
 
output-html
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should generate pretty printed output, writing it as HTML.
 
output-xhtml
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should generate pretty printed output, writing it as extensible HTML. This option causes Tidy to set the DOCTYPE and default namespace as appropriate to XHTML. If a DOCTYPE or namespace is given they will checked for consistency with the content of the document. In the case of an inconsistency, the corrected values will appear in the output. For XHTML, entities can be written as named or numeric entities according to the setting of the "numeric-entities" option. The original case of tags and attributes will be preserved, regardless of other options.
 
output-xml
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should pretty print output, writing it as well-formed XML. Any entities not defined in XML 1.0 will be written as numeric entities to allow them to be parsed by a XML parser. The original case of tags and attributes will be preserved, regardless of other options.
 
preserve-entities
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should preserve the well-formed entitites as found in the input.
 
quote-ampersand
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should output unadorned & characters as &amp;.
 
quote-marks
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should output " characters as &quot; as is preferred by some editing environments. The apostrophe character ' is written out as &#39; since many web browsers don't yet support &apos;.
 
quote-nbsp
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should output non-breaking space characters as entities, rather than as the Unicode character value 160 (decimal).
 
repeated-attributes
Type: enum
+ Default: keep-last
Example: keep-first, keep-last
+ join-classes +
+ join-styles +
This option specifies if Tidy should keep the first or last attribute, if an attribute is repeated, e.g. has two align attributes.
 
replace-color
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should replace numeric values in color attributes by HTML/XHTML color names where defined, e.g. replace "#ffffff" with "white".
 
show-body-only
Type: AutoBool
+ Default: no
Example: auto, y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should print only the contents of the body tag as an HTML fragment. If set to "auto", this is performed only if the body tag has been inferred. Useful for incorporating existing whole pages as a portion of another page. This option has no effect if XML output is requested.
 
uppercase-attributes
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should output attribute names in upper case. The default is no, which results in lower case attribute names, except for XML input, where the original case is preserved.
 
uppercase-tags
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should output tag names in upper case. The default is no, which results in lower case tag names, except for XML input, where the original case is preserved.
 
word-2000
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should go to great pains to strip out all the surplus stuff Microsoft Word 2000 inserts when you save Word documents as "Web pages". Doesn't handle embedded images or VML. You should consider using Word's "Save As: Web Page, Filtered".
 
 
Diagnostics Options Reference +
 
accessibility-check
Type: enum
+ Default: 0 (Tidy Classic)
Example: 0 (Tidy Classic), 1 (Priority 1 Checks), 2 (Priority 2 Checks), 3 (Priority 3 Checks)
This option specifies what level of accessibility checking, if any, that Tidy should do. Level 0 is equivalent to Tidy Classic's accessibility checking. For more information on Tidy's accessibility checking, visit the Adaptive Technology Resource Centre at the University of Toronto.
 
show-errors
Type: Integer
+ Default: 6
Example: 0, 1, 2, ...
This option specifies the number Tidy uses to determine if further errors should be shown. If set to 0, then no errors are shown.
 
show-warnings
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should suppress warnings. This can be useful when a few errors are hidden in a flurry of warnings.
 
 
Pretty Print Options Reference +
 
break-before-br
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should output a line break before each <BR> element.
 
indent
Type: AutoBool
+ Default: no
Example: auto, y/n, yes/no, t/f, true/false, 1/0
+ indent-spaces +
This option specifies if Tidy should indent block-level tags. If set to "auto", this option causes Tidy to decide whether or not to indent the content of tags such as TITLE, H1-H6, LI, TD, TD, or P depending on whether or not the content includes a block-level element. You are advised to avoid setting indent to yes as this can expose layout bugs in some browsers.
 
indent-attributes
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should begin each attribute on a new line.
 
indent-spaces
Type: Integer
+ Default: 2
Example: 0, 1, 2, ...
+ indent +
This option specifies the number of spaces Tidy uses to indent content, when indentation is enabled.
 
markup
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should generate a pretty printed version of the markup. Note that Tidy won't generate a pretty printed version if it finds significant errors (see force-output).
 
punctuation-wrap
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should line wrap after some Unicode or Chinese punctuation characters.
 
sort-attributes
Type: enum
+ Default: none
Example: none, alpha
This option specifies that tidy should sort attributes within an element using the specified sort algorithm. If set to "alpha", the algorithm is an ascending alphabetic sort.
 
split
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
Currently not used. Tidy Classic only.
 
tab-size
Type: Integer
+ Default: 8
Example: 0, 1, 2, ...
This option specifies the number of columns that Tidy uses between successive tab stops. It is used to map tabs to spaces when reading the input. Tidy never outputs tabs.
 
vertical-space
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should add some empty lines for readability.
 
wrap
Type: Integer
+ Default: 68
Example: 0 (no wrapping), 1, 2, ...
This option specifies the right margin Tidy uses for line wrapping. Tidy tries to wrap lines so that they do not exceed this length. Set wrap to zero if you want to disable line wrapping.
 
wrap-asp
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should line wrap text contained within ASP pseudo elements, which look like: <% ... %>.
 
wrap-attributes
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
+ wrap-script-literals +
This option specifies if Tidy should line wrap attribute values, for easier editing. This option can be set independently of wrap-script-literals.
 
wrap-jste
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should line wrap text contained within JSTE pseudo elements, which look like: <# ... #>.
 
wrap-php
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should line wrap text contained within PHP pseudo elements, which look like: <?php ... ?>.
 
wrap-script-literals
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
+ wrap-attributes +
This option specifies if Tidy should line wrap string literals that appear in script attributes. Tidy wraps long script string literals by inserting a backslash character before the line break.
 
wrap-sections
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should line wrap text contained within <![ ... ]> section tags.
 
 
Character Encoding Options Reference +
 
ascii-chars
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
+ clean +
Can be used to modify behavior of -c (--clean yes) option. If set to "yes" when using -c, &emdash;, &rdquo;, and other named character entities are downgraded to their closest ascii equivalents.
 
char-encoding
Type: Encoding
+ Default: utf8
Example: raw, ascii, latin0, latin1, utf8, iso2022, mac, win1252, ibm858, utf16le, utf16be, utf16, big5, shiftjis
+ input-encoding +
+ output-encoding +
This option specifies the character encoding Tidy uses for both the input and output. For ascii, Tidy will accept Latin-1 (ISO-8859-1) character values, but will use entities for all characters whose value > 127. For raw, Tidy will output values above 127 without translating them into entities. For latin1, characters above 255 will be written as entities. For utf8, Tidy assumes that both input and output is encoded as UTF-8. You can use iso2022 for files encoded using the ISO-2022 family of encodings e.g. ISO-2022-JP. For mac and win1252, Tidy will accept vendor specific character values, but will use entities for all characters whose value > 127. For unsupported encodings, use an external utility to convert to and from UTF-8.
 
input-encoding
Type: Encoding
+ Default: utf8
Example: raw, ascii, latin0, latin1, utf8, iso2022, mac, win1252, ibm858, utf16le, utf16be, utf16, big5, shiftjis
+ char-encoding +
This option specifies the character encoding Tidy uses for the input. See char-encoding for more info.
 
language
Type: String
+ Default: -
Example: -
Currently not used, but this option specifies the language Tidy uses (for instance "en").
 
newline
Type: enum
+ Default: Platform dependent
Example: LF, CRLF, CR
The default is appropriate to the current platform: CRLF on PC-DOS, MS-Windows and OS/2, CR on Classic Mac OS, and LF everywhere else (Unix and Linux).
 
output-bom
Type: AutoBool
+ Default: auto
Example: auto, y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should write a Unicode Byte Order Mark character (BOM; also known as Zero Width No-Break Space; has value of U+FEFF) to the beginning of the output; only for UTF-8 and UTF-16 output encodings. If set to "auto", this option causes Tidy to write a BOM to the output only if a BOM was present at the beginning of the input. A BOM is always written for XML/XHTML output using UTF-16 output encodings.
 
output-encoding
Type: Encoding
+ Default: utf8
Example: raw, ascii, latin0, latin1, utf8, iso2022, mac, win1252, ibm858, utf16le, utf16be, utf16, big5, shiftjis
+ char-encoding +
This option specifies the character encoding Tidy uses for the output. See char-encoding for more info. May only be different from input-encoding for Latin encodings (ascii, latin0, latin1, mac, win1252, ibm858).
 
 
Miscellaneous Options Reference +
 
error-file
Type: String
+ Default: -
Example: -
+ output-file +
This option specifies the error file Tidy uses for errors and warnings. Normally errors and warnings are output to "stderr".
 
force-output
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should produce output even if errors are encountered. Use this option with care - if Tidy reports an error, this means Tidy was not able to, or is not sure how to, fix the error, so the resulting output may not reflect your intention.
 
gnu-emacs
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should change the format for reporting errors and warnings to a format that is more easily parsed by GNU Emacs.
 
gnu-emacs-file
Type: String
+ Default: -
Example: -
Used internally.
 
keep-time
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should keep the original modification time of files that Tidy modifies in place. The default is no. Setting the option to yes allows you to tidy files without causing these files to be uploaded to a web server when using a tool such as SiteCopy. Note this feature is not supported on some platforms.
 
output-file
Type: String
+ Default: -
Example: -
+ error-file +
This option specifies the output file Tidy uses for markup. Normally markup is written to "stdout".
 
quiet
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should output the summary of the numbers of errors and warnings, or the welcome or informational messages.
 
slide-style
Type: String
+ Default: -
Example: -
Currently not used. Tidy Classic only.
 
tidy-mark
Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should add a meta element to the document head to indicate that the document has been tidied. Tidy won't add a meta element if one is already present.
 
write-back
Type: Boolean
+ Default: no
Example: y/n, yes/no, t/f, true/false, 1/0
This option specifies if Tidy should write back the tidied markup to the same file it read from. You are advised to keep copies of important files before tidying them, as on rare occasions the result may not be what you expect.
 
+ + diff --git a/htmldoc/tidy1.xsl b/htmldoc/tidy1.xsl index 9fa8235..eda8156 100644 --- a/htmldoc/tidy1.xsl +++ b/htmldoc/tidy1.xsl @@ -47,8 +47,8 @@ - .\" tidy man page for the HTML5 for of Tidy -.TH TIDY 1 "https://github.com/w3c/tidy-html5" "HTML Tidy" "https://github.com/w3c/tidy-html5" + .\" tidy man page for the HTML5 fork of Tidy +.TH TIDY 1 "" "HTML Tidy" "" @@ -78,37 +78,6 @@ Input/Output default to stdin/stdout respectively. Single letter options apart f .LP .in 1i \fBtidy -f errs.txt -imu foo.html\fR -.LP -For more information on HTML, see the following: -.RS 4 -.LP -\fBHTML: Edition for Web Authors\fR (the latest HTML specification) -.br -http://dev.w3.org/html5/spec-author-view -.LP -\fBHTML: The Markup Language\fR (an HTML language reference) -.br -http://dev.w3.org/html5/markup/ -.RE -.LP -For more information about HTML Tidy, see: -.RS 4 -.LP -https://github.com/w3c/tidy-html5 -.RE -.LP -For bug reports and comments, see: -.RS 4 -.LP -https://github.com/w3c/tidy-html5/issues/ -...or send questions and comments to \fBhtml-tidy@w3.org\fR -.RE -.LP -Validate your HTML documents using the \fBW3C Nu Markup Validator\fR: -.RS 4 -.LP -http://validator.w3.org/nu/ -.RE .SH ENVIRONMENT .TP .B HTML_TIDY @@ -384,12 +353,42 @@ appearing in content with another backslash. +.SH SEE ALSO +For more information on HTML: +.RS 4 +.LP +\fBHTML: Edition for Web Authors\fR (the latest HTML specification) +.br +http://dev.w3.org/html5/spec-author-view +.LP +\fBHTML: The Markup Language\fR (an HTML language reference) +.br +http://dev.w3.org/html5/markup/ +.RE +.LP +For more information about the experimental HTML5 fork of HTML Tidy: +.RS 4 +.LP +https://github.com/w3c/tidy-html5 +.RE +.LP +For bug reports and comments: +.RS 4 +.LP +https://github.com/w3c/tidy-html5/issues/ +.RE +.LP +Or send questions and comments to \fBhtml-tidy@w3.org\fR +.LP +Validate your HTML documents using the \fBW3C Nu Markup Validator\fR: +.RS 4 +.LP +http://validator.w3.org/nu/ +.RE .SH AUTHOR -\fBTidy\fR was written by \fBDave Raggett\fR <dsr@w3.org>, and was for a long while maintained by a team at http://tidy.sourceforge.net/ +\fBTidy\fR was written by \fBDave Raggett\fR <dsr@w3.org>, and susequently maintained by a team at http://tidy.sourceforge.net/ .LP -The HTML5 fork of \fBTidy\fR is at https://github.com/w3c/tidy-html5/ -.LP -The sources for \fBTidy\fR are available under the MIT Licence. +The sources for the HTML5 fork of \fBTidy\fR are available at https://github.com/w3c/tidy-html5/ under the MIT Licence. diff --git a/src/lexer.c b/src/lexer.c index ec759cf..8d111a4 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1408,10 +1408,10 @@ Bool TY_(AddGenerator)( TidyDocImpl* doc ) if (head) { #ifdef PLATFORM_NAME - TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for "PLATFORM_NAME" from https://github.com/w3c/tidy-html5", + TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for "PLATFORM_NAME" %s", tidyReleaseDate()); #else - TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy from https://github.com/w3c/tidy-html5"); + TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy %s", tidyReleaseDate()); #endif for ( node = head->content; node; node = node->next ) diff --git a/src/version.h b/src/version.h old mode 100755 new mode 100644 index 2d5f06b..43a970a --- a/src/version.h +++ b/src/version.h @@ -1,14 +1 @@ -/* version information - - (c) 2007-2009 (W3C) MIT, ERCIM, Keio University - See tidy.h for the copyright notice. - - CVS Info : - - $Author: arnaud02 $ - $Date: 2009/03/25 21:37:11 $ - $Revision: 1.46 $ - -*/ - -static const char TY_(release_date)[] = "16 November 2011"; +static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/82bada2"; \ No newline at end of file diff --git a/src/version.h~ b/src/version.h~ deleted file mode 100755 index b606ab2..0000000 --- a/src/version.h~ +++ /dev/null @@ -1,14 +0,0 @@ -/* version information - - (c) 2007-2009 (W3C) MIT, ERCIM, Keio University - See tidy.h for the copyright notice. - - CVS Info : - - $Author: arnaud02 $ - $Date: 2009/03/25 21:37:11 $ - $Revision: 1.46 $ - -*/ - -static const char TY_(release_date)[] = "25 March 2009";