diff --git a/CMakeLists.txt b/CMakeLists.txt
index 862ca31..d3a7f0f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,8 @@
cmake_minimum_required (VERSION 2.8.7)
set(LIB_NAME tidy)
+set(LIBTIDY_DESCRIPTION "${LIB_NAME} - HTML syntax checker")
+set(LIBTIDY_URL "http://www.html-tidy.org")
project (${LIB_NAME})
@@ -203,7 +205,7 @@ if (NOT BIN_INSTALL_DIR)
endif ()
if (NOT INCLUDE_INSTALL_DIR)
- set(INCLUDE_INSTALL_DIR include)
+ set(INCLUDE_INSTALL_DIR include/${LIB_NAME})
endif ()
# Always build the STATIC library
@@ -363,7 +365,7 @@ if (WIN32)
set(CPACK_SOURCE_GENERATOR "ZIP")
set(CPACK_WIX_UPGRADE_GUID "D809598A-B513-4752-B268-0BAC403B00E4")
elseif ( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" )
- set(CPACK_GENERATOR "PackageMake")
+ set(CPACK_GENERATOR "productbuild")
set(CPACK_SOURCE_GENERATOR "TGZ")
else ()
set(CPACK_GENERATOR "DEB;RPM")
@@ -371,7 +373,7 @@ else ()
endif ()
set(CPACK_PACKAGE_NAME "${LIB_NAME}")
-set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${LIB_NAME} - HTML syntax checker")
+set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${LIBTIDY_DESCRIPTION}")
set(CPACK_PACKAGE_VENDOR "HTML Tidy Advocacy Community Group")
set(CPACK_PACKAGE_CONTACT "maintainer@htacg.org")
@@ -387,7 +389,7 @@ set(CPACK_RESOURCE_FILE_WELCOME "${CMAKE_CURRENT_SOURCE_DIR}/README/README.html"
## debian config
set(CPACK_DEBIAN_PACKAGE_MAINTAINER ${CPACK_PACKAGE_CONTACT})
-set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "http://www.html-tidy.org/")
+set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${LIBTIDY_URL})
#set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc")
set(CPACK_DEBIAN_PACKAGE_SECTION "Libraries")
set(CPACK_SOURCE_IGNORE_FILES
@@ -405,4 +407,15 @@ endif ()
include(CPack)
+# pkg-config
+configure_file(
+ "${CMAKE_CURRENT_SOURCE_DIR}/${LIB_NAME}.pc.cmake.in"
+ "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}.pc"
+ @ONLY
+ )
+install(FILES
+ "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}.pc"
+ DESTINATION "${LIB_INSTALL_DIR}/pkgconfig"
+ )
+
# eof
diff --git a/README/BUILD.md b/README/BUILD.md
index 6ae5bbb..e427752 100644
--- a/README/BUILD.md
+++ b/README/BUILD.md
@@ -40,6 +40,14 @@ If you do **not** need the tidy library built as a 'shared' (DLL) library, then
See the `CMakeLists.txt` file for other CMake **options** offered.
+## Build the tidy packages
+
+ 1. `cd build/cmake`
+
+ 2. `cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr`
+
+ 3. Unix/OS X: `make package`
+
## Build PHP with the tidy-html5 library
Due to API changes in the PHP source, `buffio.h` needs to be renamed to `tidybuffio.h` in the file `ext/tidy/tidy.c` in PHP's source.
diff --git a/build/cmake/.gitignore b/build/cmake/.gitignore
index 2539213..23b5a7b 100644
--- a/build/cmake/.gitignore
+++ b/build/cmake/.gitignore
@@ -30,5 +30,6 @@ _CPack_Packages/*
tidy
libtidy*
tidy1.xsl
+tidy.pc
*.vcproj
-
+.pkg
diff --git a/build/cmake/cmake-clean.txt b/build/cmake/cmake-clean.txt
index 36eb531..c57f3ae 100644
--- a/build/cmake/cmake-clean.txt
+++ b/build/cmake/cmake-clean.txt
@@ -27,4 +27,4 @@ libtidy.so.5.0.0
tidy.1
_CPack_Packages
tidy1.xsl
-
+tidy.pc
diff --git a/include/tidyenum.h b/include/tidyenum.h
index 567f1ba..565eee9 100644
--- a/include/tidyenum.h
+++ b/include/tidyenum.h
@@ -630,6 +630,7 @@ typedef enum
TidyUpperCaseTags, /**< Output tags in upper not lower case */
TidyUseCustomTags, /**< Enable Tidy to use autonomous custom tags */
TidyVertSpace, /**< degree to which markup is spread out vertically */
+ TidyWarnPropAttrs, /**< Warns on proprietary attributes */
TidyWord2000, /**< Draconian cleaning for Word2000 */
TidyWrapAsp, /**< Wrap within ASP pseudo elements */
TidyWrapAttVals, /**< Wrap within attribute values */
diff --git a/src/access.c b/src/access.c
index f882e8c..ac3b9aa 100644
--- a/src/access.c
+++ b/src/access.c
@@ -131,6 +131,49 @@ static void MetaDataPresent( TidyDocImpl* doc, Node* node );
static void CheckEmbed( TidyDocImpl* doc, Node* node );
static void CheckListUsage( TidyDocImpl* doc, Node* node );
+/*
+ IsFilePath attempts to determine whether or not the URI indicated
+ by path is a file rather than a TLD. For example, sample.com.au might
+ be confused with an audio file.
+*/
+static Bool IsFilePath( ctmbstr path )
+{
+ const char *p = path;
+ char c;
+ typedef enum states { initial, protocol_found, slash_found, file_found } states;
+ states state = initial;
+
+ while ( ( c = *p++ ) != 0 && state != file_found )
+ {
+ switch ( state )
+ {
+ case initial:
+ if ( c == ':' )
+ state = protocol_found;
+ break;
+
+ case protocol_found:
+ if ( c =='/' )
+ state = slash_found;
+ break;
+
+ case slash_found:
+ if ( c =='/' )
+ state = protocol_found;
+ else
+ state = file_found;
+ break;
+
+ default:
+ break;
+ }
+
+ }
+
+ return state == file_found || state == initial;
+}
+
+
/*
GetFileExtension takes a path and returns the extension
portion of the path (if any).
@@ -163,9 +206,10 @@ static void GetFileExtension( ctmbstr path, tmbchar *ext, uint maxExt )
static Bool IsImage( ctmbstr iType )
{
uint i;
-
- /* Get the file extension */
tmbchar ext[20];
+
+ if ( !IsFilePath(iType) ) return 0;
+
GetFileExtension( iType, ext, sizeof(ext) );
/* Compare it to the array of known image file extensions */
@@ -190,8 +234,11 @@ static int IsSoundFile( ctmbstr sType )
{
uint i;
tmbchar ext[ 20 ];
- GetFileExtension( sType, ext, sizeof(ext) );
+ if ( !IsFilePath(sType) ) return 0;
+
+ GetFileExtension( sType, ext, sizeof(ext) );
+
for (i = 0; i < N_AUDIO_EXTS; i++)
{
if ( TY_(tmbstrcasecmp)(ext, soundExtensions[i]) == 0 )
@@ -215,6 +262,9 @@ static Bool IsValidSrcExtension( ctmbstr sType )
{
uint i;
tmbchar ext[20];
+
+ if ( !IsFilePath(sType) ) return 0;
+
GetFileExtension( sType, ext, sizeof(ext) );
for (i = 0; i < N_FRAME_EXTS; i++)
@@ -237,6 +287,9 @@ static Bool IsValidMediaExtension( ctmbstr sType )
{
uint i;
tmbchar ext[20];
+
+ if ( !IsFilePath(sType) ) return 0;
+
GetFileExtension( sType, ext, sizeof(ext) );
for (i = 0; i < N_MEDIA_EXTS; i++)
@@ -2708,6 +2761,10 @@ static Bool CheckMetaData( TidyDocImpl* doc, Node* node, Bool HasMetaData )
TY_(ReportAccessError)( doc, node, REMOVE_AUTO_REDIRECT);
}
}
+ if (TY_(IsHTML5Mode)(doc) && attrIsCHARSET(av) && hasValue(av))
+ {
+ ContainsAttr = yes;
+ }
}
if ( HasContent || HasHttpEquiv )
@@ -2787,9 +2844,17 @@ static void CheckDocType( TidyDocImpl* doc )
if (DTnode && DTnode->end != 0)
{
ctmbstr word = textFromOneNode( doc, DTnode);
- if ((strstr (word, "HTML PUBLIC") == NULL) &&
- (strstr (word, "html PUBLIC") == NULL))
- DTnode = NULL;
+ if (TY_(IsHTML5Mode)(doc))
+ {
+ if ((strstr(word, "HTML") == NULL) &&
+ (strstr(word, "html") == NULL))
+ DTnode = NULL;
+ }
+ else {
+ if ((strstr(word, "HTML PUBLIC") == NULL) &&
+ (strstr(word, "html PUBLIC") == NULL))
+ DTnode = NULL;
+ }
}
if (!DTnode)
TY_(ReportAccessError)( doc, &doc->root, DOCTYPE_MISSING);
diff --git a/src/config.c b/src/config.c
index 1ad46d7..0026dc9 100644
--- a/src/config.c
+++ b/src/config.c
@@ -320,6 +320,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyUpperCaseTags, MU, "uppercase-tags", BL, no, ParseBool, boolPicks },
{ TidyUseCustomTags, MU, "custom-tags", IN, TidyCustomNo, ParseUseCustomTags,customTagsPicks }, /* 20170309 - Issue #119 */
{ TidyVertSpace, PP, "vertical-space", IN, no, ParseAutoBool, autoBoolPicks }, /* #228 - tri option */
+ { TidyWarnPropAttrs, MU, "warn-proprietary-attributes", BL, yes, ParseBool, boolPicks },
{ TidyWord2000, MU, "word-2000", BL, no, ParseBool, boolPicks },
{ TidyWrapAsp, PP, "wrap-asp", BL, yes, ParseBool, boolPicks },
{ TidyWrapAttVals, PP, "wrap-attributes", BL, no, ParseBool, boolPicks },
diff --git a/src/gdoc.c b/src/gdoc.c
index 3205536..50cd9bc 100644
--- a/src/gdoc.c
+++ b/src/gdoc.c
@@ -115,6 +115,11 @@ static void CleanNode( TidyDocImpl* doc, Node *node )
else if (nodeIsA(child) && !child->content)
{
AttVal *id = TY_(GetAttrByName)( child, "name" );
+ /* Recent Google Docs is using "id" instead of "name" in
+ ** the exported html.
+ */
+ if (!id)
+ id = TY_(GetAttrByName)( child, "id" );
if (id)
TY_(RepairAttrValue)( doc, child->parent, "id", id->value );
diff --git a/src/language_en.h b/src/language_en.h
index e73f873..364f69b 100644
--- a/src/language_en.h
+++ b/src/language_en.h
@@ -1167,10 +1167,10 @@ static languageDefinition language_en = { whichPluralForm_en, {
be translated. */
TidyStrictTagsAttr, 0,
"This options ensures that tags and attributes are applicable for the "
- "version of HTML that Tidy outputs. When set to yes (the "
- "default) and the output document type is a strict doctype, then Tidy "
- "will report errors. If the output document type is a loose or "
- "transitional doctype, then Tidy will report warnings. "
+ "version of HTML that Tidy outputs. When set to yes and the "
+ "output document type is a strict doctype, then Tidy will report "
+ "errors. If the output document type is a loose or transitional "
+ "doctype, then Tidy will report warnings. "
"
"
"Additionally if drop-proprietary-attributes
is enabled, "
"then not applicable attributes will be dropped, too. "
@@ -1261,6 +1261,17 @@ static languageDefinition language_en = { whichPluralForm_en, {
"If set to auto Tidy will eliminate nearly all newline "
"characters."
},
+ {/* Important notes for translators:
+ - Use only
, , , , and
+
.
+ - Entities, tags, attributes, etc., should be enclosed in
.
+ - Option values should be enclosed in .
+ - It's very important that
be self-closing!
+ - The strings "Tidy" and "HTML Tidy" are the program name and must not
+ be translated. */
+ TidyWarnPropAttrs, 0,
+ "This option specifies if Tidy should warn on proprietary attributes."
+ },
{/* Important notes for translators:
- Use only
, , , , and
.
@@ -1600,14 +1611,6 @@ static languageDefinition language_en = { whichPluralForm_en, {
" search stops for the current direction.\n"
" TD cells that set the axis attribute are also treated as header cells.\n"
},
- {/* This console output should be limited to 78 characters per line. */
- TEXT_WINDOWS_CHARS, 0,
- "Characters codes for the Microsoft Windows fonts in the range\n"
- "128 - 159 may not be recognized on other platforms. You are\n"
- "instead recommended to use named entities, e.g. ™ rather\n"
- "than Windows character code 153 (0x2122 in Unicode). Note that\n"
- "as of February 1998 few browsers support the new entities.\n"
- },
{/* This console output should be limited to 78 characters per line.
- %s represents a string-encoding name which may be localized in your language. */
TEXT_VENDOR_CHARS, 0,
diff --git a/src/message.c b/src/message.c
index bd94bc9..2c5587e 100755
--- a/src/message.c
+++ b/src/message.c
@@ -757,13 +757,6 @@ void TY_(ErrorSummary)( TidyDocImpl* doc )
if (doc->badChars)
{
-#if 0
- if ( doc->badChars & WINDOWS_CHARS )
- {
- message = TY_(tidyMessageCreate)( doc, TEXT_WINDOWS_CHARS, TidyDialogueDoc);
- messagePos(message);
- }
-#endif
if (doc->badChars & BC_VENDOR_SPECIFIC_CHARS)
{
message = TY_(tidyMessageCreate)( doc, TEXT_VENDOR_CHARS, TidyDialogueDoc, encnam);
diff --git a/src/tags.c b/src/tags.c
index 50a931f..36a372c 100644
--- a/src/tags.c
+++ b/src/tags.c
@@ -184,7 +184,7 @@ static Dict tag_defs[] =
{ TidyTag_BLOCKQUOTE, "blockquote", VERS_ELEM_BLOCKQUOTE, &TY_(W3CAttrsFor_BLOCKQUOTE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_BODY, "body", VERS_ELEM_BODY, &TY_(W3CAttrsFor_BODY)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseBody), NULL },
{ TidyTag_BR, "br", VERS_ELEM_BR, &TY_(W3CAttrsFor_BR)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
- { TidyTag_BUTTON, "button", VERS_ELEM_BUTTON, &TY_(W3CAttrsFor_BUTTON)[0], (CM_INLINE), TY_(ParseBlock), NULL },
+ { TidyTag_BUTTON, "button", VERS_ELEM_BUTTON, &TY_(W3CAttrsFor_BUTTON)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_CAPTION, "caption", VERS_ELEM_CAPTION, &TY_(W3CAttrsFor_CAPTION)[0], (CM_TABLE), TY_(ParseBlock), CheckCaption },
{ TidyTag_CENTER, "center", VERS_ELEM_CENTER, &TY_(W3CAttrsFor_CENTER)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_CITE, "cite", VERS_ELEM_CITE, &TY_(W3CAttrsFor_CITE)[0], (CM_INLINE), TY_(ParseInline), NULL },
@@ -773,9 +773,6 @@ void TY_(AdjustTags)( TidyDocImpl *doc )
{
np->parser = TY_(ParseInline);
np->model = CM_INLINE;
-#if ELEMENT_HASH_LOOKUP
- tagsEmptyHash( doc, tags );
-#endif
}
/*\
@@ -787,9 +784,6 @@ void TY_(AdjustTags)( TidyDocImpl *doc )
if (np)
{
np->parser = TY_(ParseInline);
-#if ELEMENT_HASH_LOOKUP
- tagsEmptyHash( doc, tags );
-#endif
}
/*\
@@ -801,10 +795,24 @@ void TY_(AdjustTags)( TidyDocImpl *doc )
if (np)
{
np->model |= CM_HEAD; /* add back allowed in head */
-#if ELEMENT_HASH_LOOKUP
- tagsEmptyHash( doc, tags );
-#endif
}
+
+/*\
+ * Issue #461
+ * TidyTag_BUTTON is a block in HTML4,
+ * whereas it is inline in HTML5
+\*/
+ np = (Dict *)TY_(LookupTagDef)(TidyTag_BUTTON);
+ if (np)
+ {
+ np->parser = TY_(ParseBlock);
+ }
+
+#if ELEMENT_HASH_LOOKUP
+ tagsEmptyHash(doc, tags); /* not sure this is really required, but to be sure */
+#endif
+ doc->HTML5Mode = no; /* set *NOT* HTML5 mode */
+
}
Bool TY_(IsHTML5Mode)( TidyDocImpl *doc )
@@ -839,6 +847,16 @@ void TY_(ResetTags)( TidyDocImpl *doc )
{
np->model = (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM); /* reset */
}
+ /*\
+ * Issue #461
+ * TidyTag_BUTTON reset to inline in HTML5
+ \*/
+ np = (Dict *)TY_(LookupTagDef)(TidyTag_BUTTON);
+ if (np)
+ {
+ np->parser = TY_(ParseInline);
+ }
+
#if ELEMENT_HASH_LOOKUP
tagsEmptyHash( doc, tags ); /* not sure this is really required, but to be sure */
#endif
@@ -858,7 +876,6 @@ void TY_(FreeTags)( TidyDocImpl* doc )
/* get rid of dangling tag references */
TidyClearMemory( tags, sizeof(TidyTagImpl) );
- doc->HTML5Mode = no; /* reset html5 mode == legacy html4 mode */
}
diff --git a/src/tidylib.c b/src/tidylib.c
index b429314..62afefe 100755
--- a/src/tidylib.c
+++ b/src/tidylib.c
@@ -1823,7 +1823,10 @@ void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node )
attrIsMismatched = check_versions ? TY_(AttributeIsMismatched)(node, attval, doc) : no;
/* Let the PROPRIETARY_ATTRIBUTE warning have precedence. */
if ( attrIsProprietary )
- TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
+ {
+ if ( cfgBool(doc, TidyWarnPropAttrs) )
+ TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
+ }
else if ( attrIsMismatched )
{
TY_(ReportAttrError)(doc, node, attval, attrReportType);
diff --git a/tidy.pc.cmake.in b/tidy.pc.cmake.in
new file mode 100644
index 0000000..7d819f1
--- /dev/null
+++ b/tidy.pc.cmake.in
@@ -0,0 +1,11 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/@LIB_INSTALL_DIR@
+includedir=${prefix}/@INCLUDE_INSTALL_DIR@
+
+Name: @LIB_NAME@
+Description: @LIBTIDY_DESCRIPTION@
+URL: @LIBTIDY_URL@
+Version: @LIBTIDY_VERSION@
+Libs: -L${libdir} -l@LIB_NAME@
+Cflags: -I${includedir}
diff --git a/version.txt b/version.txt
index 605309b..ed2e779 100644
--- a/version.txt
+++ b/version.txt
@@ -1,2 +1,2 @@
-5.5.13
-2017.03.31
+5.5.18
+2017.05.06