diff --git a/CMakeLists.txt b/CMakeLists.txt index 862ca31..d3a7f0f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,8 @@ cmake_minimum_required (VERSION 2.8.7) set(LIB_NAME tidy) +set(LIBTIDY_DESCRIPTION "${LIB_NAME} - HTML syntax checker") +set(LIBTIDY_URL "http://www.html-tidy.org") project (${LIB_NAME}) @@ -203,7 +205,7 @@ if (NOT BIN_INSTALL_DIR) endif () if (NOT INCLUDE_INSTALL_DIR) - set(INCLUDE_INSTALL_DIR include) + set(INCLUDE_INSTALL_DIR include/${LIB_NAME}) endif () # Always build the STATIC library @@ -363,7 +365,7 @@ if (WIN32) set(CPACK_SOURCE_GENERATOR "ZIP") set(CPACK_WIX_UPGRADE_GUID "D809598A-B513-4752-B268-0BAC403B00E4") elseif ( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" ) - set(CPACK_GENERATOR "PackageMake") + set(CPACK_GENERATOR "productbuild") set(CPACK_SOURCE_GENERATOR "TGZ") else () set(CPACK_GENERATOR "DEB;RPM") @@ -371,7 +373,7 @@ else () endif () set(CPACK_PACKAGE_NAME "${LIB_NAME}") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${LIB_NAME} - HTML syntax checker") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${LIBTIDY_DESCRIPTION}") set(CPACK_PACKAGE_VENDOR "HTML Tidy Advocacy Community Group") set(CPACK_PACKAGE_CONTACT "maintainer@htacg.org") @@ -387,7 +389,7 @@ set(CPACK_RESOURCE_FILE_WELCOME "${CMAKE_CURRENT_SOURCE_DIR}/README/README.html" ## debian config set(CPACK_DEBIAN_PACKAGE_MAINTAINER ${CPACK_PACKAGE_CONTACT}) -set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "http://www.html-tidy.org/") +set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${LIBTIDY_URL}) #set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc") set(CPACK_DEBIAN_PACKAGE_SECTION "Libraries") set(CPACK_SOURCE_IGNORE_FILES @@ -405,4 +407,15 @@ endif () include(CPack) +# pkg-config +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/${LIB_NAME}.pc.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}.pc" + @ONLY + ) +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}.pc" + DESTINATION "${LIB_INSTALL_DIR}/pkgconfig" + ) + # eof diff --git a/README/BUILD.md b/README/BUILD.md index 6ae5bbb..e427752 100644 --- a/README/BUILD.md +++ b/README/BUILD.md @@ -40,6 +40,14 @@ If you do **not** need the tidy library built as a 'shared' (DLL) library, then See the `CMakeLists.txt` file for other CMake **options** offered. +## Build the tidy packages + + 1. `cd build/cmake` + + 2. `cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr` + + 3. Unix/OS X: `make package` + ## Build PHP with the tidy-html5 library Due to API changes in the PHP source, `buffio.h` needs to be renamed to `tidybuffio.h` in the file `ext/tidy/tidy.c` in PHP's source. diff --git a/build/cmake/.gitignore b/build/cmake/.gitignore index 2539213..23b5a7b 100644 --- a/build/cmake/.gitignore +++ b/build/cmake/.gitignore @@ -30,5 +30,6 @@ _CPack_Packages/* tidy libtidy* tidy1.xsl +tidy.pc *.vcproj - +.pkg diff --git a/build/cmake/cmake-clean.txt b/build/cmake/cmake-clean.txt index 36eb531..c57f3ae 100644 --- a/build/cmake/cmake-clean.txt +++ b/build/cmake/cmake-clean.txt @@ -27,4 +27,4 @@ libtidy.so.5.0.0 tidy.1 _CPack_Packages tidy1.xsl - +tidy.pc diff --git a/include/tidyenum.h b/include/tidyenum.h index 567f1ba..565eee9 100644 --- a/include/tidyenum.h +++ b/include/tidyenum.h @@ -630,6 +630,7 @@ typedef enum TidyUpperCaseTags, /**< Output tags in upper not lower case */ TidyUseCustomTags, /**< Enable Tidy to use autonomous custom tags */ TidyVertSpace, /**< degree to which markup is spread out vertically */ + TidyWarnPropAttrs, /**< Warns on proprietary attributes */ TidyWord2000, /**< Draconian cleaning for Word2000 */ TidyWrapAsp, /**< Wrap within ASP pseudo elements */ TidyWrapAttVals, /**< Wrap within attribute values */ diff --git a/src/access.c b/src/access.c index f882e8c..ac3b9aa 100644 --- a/src/access.c +++ b/src/access.c @@ -131,6 +131,49 @@ static void MetaDataPresent( TidyDocImpl* doc, Node* node ); static void CheckEmbed( TidyDocImpl* doc, Node* node ); static void CheckListUsage( TidyDocImpl* doc, Node* node ); +/* + IsFilePath attempts to determine whether or not the URI indicated + by path is a file rather than a TLD. For example, sample.com.au might + be confused with an audio file. +*/ +static Bool IsFilePath( ctmbstr path ) +{ + const char *p = path; + char c; + typedef enum states { initial, protocol_found, slash_found, file_found } states; + states state = initial; + + while ( ( c = *p++ ) != 0 && state != file_found ) + { + switch ( state ) + { + case initial: + if ( c == ':' ) + state = protocol_found; + break; + + case protocol_found: + if ( c =='/' ) + state = slash_found; + break; + + case slash_found: + if ( c =='/' ) + state = protocol_found; + else + state = file_found; + break; + + default: + break; + } + + } + + return state == file_found || state == initial; +} + + /* GetFileExtension takes a path and returns the extension portion of the path (if any). @@ -163,9 +206,10 @@ static void GetFileExtension( ctmbstr path, tmbchar *ext, uint maxExt ) static Bool IsImage( ctmbstr iType ) { uint i; - - /* Get the file extension */ tmbchar ext[20]; + + if ( !IsFilePath(iType) ) return 0; + GetFileExtension( iType, ext, sizeof(ext) ); /* Compare it to the array of known image file extensions */ @@ -190,8 +234,11 @@ static int IsSoundFile( ctmbstr sType ) { uint i; tmbchar ext[ 20 ]; - GetFileExtension( sType, ext, sizeof(ext) ); + if ( !IsFilePath(sType) ) return 0; + + GetFileExtension( sType, ext, sizeof(ext) ); + for (i = 0; i < N_AUDIO_EXTS; i++) { if ( TY_(tmbstrcasecmp)(ext, soundExtensions[i]) == 0 ) @@ -215,6 +262,9 @@ static Bool IsValidSrcExtension( ctmbstr sType ) { uint i; tmbchar ext[20]; + + if ( !IsFilePath(sType) ) return 0; + GetFileExtension( sType, ext, sizeof(ext) ); for (i = 0; i < N_FRAME_EXTS; i++) @@ -237,6 +287,9 @@ static Bool IsValidMediaExtension( ctmbstr sType ) { uint i; tmbchar ext[20]; + + if ( !IsFilePath(sType) ) return 0; + GetFileExtension( sType, ext, sizeof(ext) ); for (i = 0; i < N_MEDIA_EXTS; i++) @@ -2708,6 +2761,10 @@ static Bool CheckMetaData( TidyDocImpl* doc, Node* node, Bool HasMetaData ) TY_(ReportAccessError)( doc, node, REMOVE_AUTO_REDIRECT); } } + if (TY_(IsHTML5Mode)(doc) && attrIsCHARSET(av) && hasValue(av)) + { + ContainsAttr = yes; + } } if ( HasContent || HasHttpEquiv ) @@ -2787,9 +2844,17 @@ static void CheckDocType( TidyDocImpl* doc ) if (DTnode && DTnode->end != 0) { ctmbstr word = textFromOneNode( doc, DTnode); - if ((strstr (word, "HTML PUBLIC") == NULL) && - (strstr (word, "html PUBLIC") == NULL)) - DTnode = NULL; + if (TY_(IsHTML5Mode)(doc)) + { + if ((strstr(word, "HTML") == NULL) && + (strstr(word, "html") == NULL)) + DTnode = NULL; + } + else { + if ((strstr(word, "HTML PUBLIC") == NULL) && + (strstr(word, "html PUBLIC") == NULL)) + DTnode = NULL; + } } if (!DTnode) TY_(ReportAccessError)( doc, &doc->root, DOCTYPE_MISSING); diff --git a/src/config.c b/src/config.c index 1ad46d7..0026dc9 100644 --- a/src/config.c +++ b/src/config.c @@ -320,6 +320,7 @@ static const TidyOptionImpl option_defs[] = { TidyUpperCaseTags, MU, "uppercase-tags", BL, no, ParseBool, boolPicks }, { TidyUseCustomTags, MU, "custom-tags", IN, TidyCustomNo, ParseUseCustomTags,customTagsPicks }, /* 20170309 - Issue #119 */ { TidyVertSpace, PP, "vertical-space", IN, no, ParseAutoBool, autoBoolPicks }, /* #228 - tri option */ + { TidyWarnPropAttrs, MU, "warn-proprietary-attributes", BL, yes, ParseBool, boolPicks }, { TidyWord2000, MU, "word-2000", BL, no, ParseBool, boolPicks }, { TidyWrapAsp, PP, "wrap-asp", BL, yes, ParseBool, boolPicks }, { TidyWrapAttVals, PP, "wrap-attributes", BL, no, ParseBool, boolPicks }, diff --git a/src/gdoc.c b/src/gdoc.c index 3205536..50cd9bc 100644 --- a/src/gdoc.c +++ b/src/gdoc.c @@ -115,6 +115,11 @@ static void CleanNode( TidyDocImpl* doc, Node *node ) else if (nodeIsA(child) && !child->content) { AttVal *id = TY_(GetAttrByName)( child, "name" ); + /* Recent Google Docs is using "id" instead of "name" in + ** the exported html. + */ + if (!id) + id = TY_(GetAttrByName)( child, "id" ); if (id) TY_(RepairAttrValue)( doc, child->parent, "id", id->value ); diff --git a/src/language_en.h b/src/language_en.h index e73f873..364f69b 100644 --- a/src/language_en.h +++ b/src/language_en.h @@ -1167,10 +1167,10 @@ static languageDefinition language_en = { whichPluralForm_en, { be translated. */ TidyStrictTagsAttr, 0, "This options ensures that tags and attributes are applicable for the " - "version of HTML that Tidy outputs. When set to yes (the " - "default) and the output document type is a strict doctype, then Tidy " - "will report errors. If the output document type is a loose or " - "transitional doctype, then Tidy will report warnings. " + "version of HTML that Tidy outputs. When set to yes and the " + "output document type is a strict doctype, then Tidy will report " + "errors. If the output document type is a loose or transitional " + "doctype, then Tidy will report warnings. " "
" "Additionally if drop-proprietary-attributes is enabled, " "then not applicable attributes will be dropped, too. " @@ -1261,6 +1261,17 @@ static languageDefinition language_en = { whichPluralForm_en, { "If set to auto Tidy will eliminate nearly all newline " "characters." }, + {/* Important notes for translators: + - Use only , , , , and +
. + - Entities, tags, attributes, etc., should be enclosed in . + - Option values should be enclosed in . + - It's very important that
be self-closing! + - The strings "Tidy" and "HTML Tidy" are the program name and must not + be translated. */ + TidyWarnPropAttrs, 0, + "This option specifies if Tidy should warn on proprietary attributes." + }, {/* Important notes for translators: - Use only , , , , and
. @@ -1600,14 +1611,6 @@ static languageDefinition language_en = { whichPluralForm_en, { " search stops for the current direction.\n" " TD cells that set the axis attribute are also treated as header cells.\n" }, - {/* This console output should be limited to 78 characters per line. */ - TEXT_WINDOWS_CHARS, 0, - "Characters codes for the Microsoft Windows fonts in the range\n" - "128 - 159 may not be recognized on other platforms. You are\n" - "instead recommended to use named entities, e.g. ™ rather\n" - "than Windows character code 153 (0x2122 in Unicode). Note that\n" - "as of February 1998 few browsers support the new entities.\n" - }, {/* This console output should be limited to 78 characters per line. - %s represents a string-encoding name which may be localized in your language. */ TEXT_VENDOR_CHARS, 0, diff --git a/src/message.c b/src/message.c index bd94bc9..2c5587e 100755 --- a/src/message.c +++ b/src/message.c @@ -757,13 +757,6 @@ void TY_(ErrorSummary)( TidyDocImpl* doc ) if (doc->badChars) { -#if 0 - if ( doc->badChars & WINDOWS_CHARS ) - { - message = TY_(tidyMessageCreate)( doc, TEXT_WINDOWS_CHARS, TidyDialogueDoc); - messagePos(message); - } -#endif if (doc->badChars & BC_VENDOR_SPECIFIC_CHARS) { message = TY_(tidyMessageCreate)( doc, TEXT_VENDOR_CHARS, TidyDialogueDoc, encnam); diff --git a/src/tags.c b/src/tags.c index 50a931f..36a372c 100644 --- a/src/tags.c +++ b/src/tags.c @@ -184,7 +184,7 @@ static Dict tag_defs[] = { TidyTag_BLOCKQUOTE, "blockquote", VERS_ELEM_BLOCKQUOTE, &TY_(W3CAttrsFor_BLOCKQUOTE)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_BODY, "body", VERS_ELEM_BODY, &TY_(W3CAttrsFor_BODY)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseBody), NULL }, { TidyTag_BR, "br", VERS_ELEM_BR, &TY_(W3CAttrsFor_BR)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, - { TidyTag_BUTTON, "button", VERS_ELEM_BUTTON, &TY_(W3CAttrsFor_BUTTON)[0], (CM_INLINE), TY_(ParseBlock), NULL }, + { TidyTag_BUTTON, "button", VERS_ELEM_BUTTON, &TY_(W3CAttrsFor_BUTTON)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_CAPTION, "caption", VERS_ELEM_CAPTION, &TY_(W3CAttrsFor_CAPTION)[0], (CM_TABLE), TY_(ParseBlock), CheckCaption }, { TidyTag_CENTER, "center", VERS_ELEM_CENTER, &TY_(W3CAttrsFor_CENTER)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_CITE, "cite", VERS_ELEM_CITE, &TY_(W3CAttrsFor_CITE)[0], (CM_INLINE), TY_(ParseInline), NULL }, @@ -773,9 +773,6 @@ void TY_(AdjustTags)( TidyDocImpl *doc ) { np->parser = TY_(ParseInline); np->model = CM_INLINE; -#if ELEMENT_HASH_LOOKUP - tagsEmptyHash( doc, tags ); -#endif } /*\ @@ -787,9 +784,6 @@ void TY_(AdjustTags)( TidyDocImpl *doc ) if (np) { np->parser = TY_(ParseInline); -#if ELEMENT_HASH_LOOKUP - tagsEmptyHash( doc, tags ); -#endif } /*\ @@ -801,10 +795,24 @@ void TY_(AdjustTags)( TidyDocImpl *doc ) if (np) { np->model |= CM_HEAD; /* add back allowed in head */ -#if ELEMENT_HASH_LOOKUP - tagsEmptyHash( doc, tags ); -#endif } + +/*\ + * Issue #461 + * TidyTag_BUTTON is a block in HTML4, + * whereas it is inline in HTML5 +\*/ + np = (Dict *)TY_(LookupTagDef)(TidyTag_BUTTON); + if (np) + { + np->parser = TY_(ParseBlock); + } + +#if ELEMENT_HASH_LOOKUP + tagsEmptyHash(doc, tags); /* not sure this is really required, but to be sure */ +#endif + doc->HTML5Mode = no; /* set *NOT* HTML5 mode */ + } Bool TY_(IsHTML5Mode)( TidyDocImpl *doc ) @@ -839,6 +847,16 @@ void TY_(ResetTags)( TidyDocImpl *doc ) { np->model = (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM); /* reset */ } + /*\ + * Issue #461 + * TidyTag_BUTTON reset to inline in HTML5 + \*/ + np = (Dict *)TY_(LookupTagDef)(TidyTag_BUTTON); + if (np) + { + np->parser = TY_(ParseInline); + } + #if ELEMENT_HASH_LOOKUP tagsEmptyHash( doc, tags ); /* not sure this is really required, but to be sure */ #endif @@ -858,7 +876,6 @@ void TY_(FreeTags)( TidyDocImpl* doc ) /* get rid of dangling tag references */ TidyClearMemory( tags, sizeof(TidyTagImpl) ); - doc->HTML5Mode = no; /* reset html5 mode == legacy html4 mode */ } diff --git a/src/tidylib.c b/src/tidylib.c index b429314..62afefe 100755 --- a/src/tidylib.c +++ b/src/tidylib.c @@ -1823,7 +1823,10 @@ void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node ) attrIsMismatched = check_versions ? TY_(AttributeIsMismatched)(node, attval, doc) : no; /* Let the PROPRIETARY_ATTRIBUTE warning have precedence. */ if ( attrIsProprietary ) - TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE); + { + if ( cfgBool(doc, TidyWarnPropAttrs) ) + TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE); + } else if ( attrIsMismatched ) { TY_(ReportAttrError)(doc, node, attval, attrReportType); diff --git a/tidy.pc.cmake.in b/tidy.pc.cmake.in new file mode 100644 index 0000000..7d819f1 --- /dev/null +++ b/tidy.pc.cmake.in @@ -0,0 +1,11 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=${exec_prefix}/@LIB_INSTALL_DIR@ +includedir=${prefix}/@INCLUDE_INSTALL_DIR@ + +Name: @LIB_NAME@ +Description: @LIBTIDY_DESCRIPTION@ +URL: @LIBTIDY_URL@ +Version: @LIBTIDY_VERSION@ +Libs: -L${libdir} -l@LIB_NAME@ +Cflags: -I${includedir} diff --git a/version.txt b/version.txt index 605309b..ed2e779 100644 --- a/version.txt +++ b/version.txt @@ -1,2 +1,2 @@ -5.5.13 -2017.03.31 +5.5.18 +2017.05.06