Merge branch 'next' into deprecations

This commit is contained in:
Jim Derry 2017-05-06 14:34:48 -04:00
commit 09d1802298
14 changed files with 166 additions and 45 deletions

View File

@ -3,6 +3,8 @@
cmake_minimum_required (VERSION 2.8.7)
set(LIB_NAME tidy)
set(LIBTIDY_DESCRIPTION "${LIB_NAME} - HTML syntax checker")
set(LIBTIDY_URL "http://www.html-tidy.org")
project (${LIB_NAME})
@ -203,7 +205,7 @@ if (NOT BIN_INSTALL_DIR)
endif ()
if (NOT INCLUDE_INSTALL_DIR)
set(INCLUDE_INSTALL_DIR include)
set(INCLUDE_INSTALL_DIR include/${LIB_NAME})
endif ()
# Always build the STATIC library
@ -363,7 +365,7 @@ if (WIN32)
set(CPACK_SOURCE_GENERATOR "ZIP")
set(CPACK_WIX_UPGRADE_GUID "D809598A-B513-4752-B268-0BAC403B00E4")
elseif ( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" )
set(CPACK_GENERATOR "PackageMake")
set(CPACK_GENERATOR "productbuild")
set(CPACK_SOURCE_GENERATOR "TGZ")
else ()
set(CPACK_GENERATOR "DEB;RPM")
@ -371,7 +373,7 @@ else ()
endif ()
set(CPACK_PACKAGE_NAME "${LIB_NAME}")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${LIB_NAME} - HTML syntax checker")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${LIBTIDY_DESCRIPTION}")
set(CPACK_PACKAGE_VENDOR "HTML Tidy Advocacy Community Group")
set(CPACK_PACKAGE_CONTACT "maintainer@htacg.org")
@ -387,7 +389,7 @@ set(CPACK_RESOURCE_FILE_WELCOME "${CMAKE_CURRENT_SOURCE_DIR}/README/README.html"
## debian config
set(CPACK_DEBIAN_PACKAGE_MAINTAINER ${CPACK_PACKAGE_CONTACT})
set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "http://www.html-tidy.org/")
set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${LIBTIDY_URL})
#set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc")
set(CPACK_DEBIAN_PACKAGE_SECTION "Libraries")
set(CPACK_SOURCE_IGNORE_FILES
@ -405,4 +407,15 @@ endif ()
include(CPack)
# pkg-config
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/${LIB_NAME}.pc.cmake.in"
"${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}.pc"
@ONLY
)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}.pc"
DESTINATION "${LIB_INSTALL_DIR}/pkgconfig"
)
# eof

View File

@ -40,6 +40,14 @@ If you do **not** need the tidy library built as a 'shared' (DLL) library, then
See the `CMakeLists.txt` file for other CMake **options** offered.
## Build the tidy packages
1. `cd build/cmake`
2. `cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr`
3. Unix/OS X: `make package`
## Build PHP with the tidy-html5 library
Due to API changes in the PHP source, `buffio.h` needs to be renamed to `tidybuffio.h` in the file `ext/tidy/tidy.c` in PHP's source.

View File

@ -30,5 +30,6 @@ _CPack_Packages/*
tidy
libtidy*
tidy1.xsl
tidy.pc
*.vcproj
.pkg

View File

@ -27,4 +27,4 @@ libtidy.so.5.0.0
tidy.1
_CPack_Packages
tidy1.xsl
tidy.pc

View File

@ -630,6 +630,7 @@ typedef enum
TidyUpperCaseTags, /**< Output tags in upper not lower case */
TidyUseCustomTags, /**< Enable Tidy to use autonomous custom tags */
TidyVertSpace, /**< degree to which markup is spread out vertically */
TidyWarnPropAttrs, /**< Warns on proprietary attributes */
TidyWord2000, /**< Draconian cleaning for Word2000 */
TidyWrapAsp, /**< Wrap within ASP pseudo elements */
TidyWrapAttVals, /**< Wrap within attribute values */

View File

@ -131,6 +131,49 @@ static void MetaDataPresent( TidyDocImpl* doc, Node* node );
static void CheckEmbed( TidyDocImpl* doc, Node* node );
static void CheckListUsage( TidyDocImpl* doc, Node* node );
/*
IsFilePath attempts to determine whether or not the URI indicated
by path is a file rather than a TLD. For example, sample.com.au might
be confused with an audio file.
*/
static Bool IsFilePath( ctmbstr path )
{
const char *p = path;
char c;
typedef enum states { initial, protocol_found, slash_found, file_found } states;
states state = initial;
while ( ( c = *p++ ) != 0 && state != file_found )
{
switch ( state )
{
case initial:
if ( c == ':' )
state = protocol_found;
break;
case protocol_found:
if ( c =='/' )
state = slash_found;
break;
case slash_found:
if ( c =='/' )
state = protocol_found;
else
state = file_found;
break;
default:
break;
}
}
return state == file_found || state == initial;
}
/*
GetFileExtension takes a path and returns the extension
portion of the path (if any).
@ -163,9 +206,10 @@ static void GetFileExtension( ctmbstr path, tmbchar *ext, uint maxExt )
static Bool IsImage( ctmbstr iType )
{
uint i;
/* Get the file extension */
tmbchar ext[20];
if ( !IsFilePath(iType) ) return 0;
GetFileExtension( iType, ext, sizeof(ext) );
/* Compare it to the array of known image file extensions */
@ -190,8 +234,11 @@ static int IsSoundFile( ctmbstr sType )
{
uint i;
tmbchar ext[ 20 ];
GetFileExtension( sType, ext, sizeof(ext) );
if ( !IsFilePath(sType) ) return 0;
GetFileExtension( sType, ext, sizeof(ext) );
for (i = 0; i < N_AUDIO_EXTS; i++)
{
if ( TY_(tmbstrcasecmp)(ext, soundExtensions[i]) == 0 )
@ -215,6 +262,9 @@ static Bool IsValidSrcExtension( ctmbstr sType )
{
uint i;
tmbchar ext[20];
if ( !IsFilePath(sType) ) return 0;
GetFileExtension( sType, ext, sizeof(ext) );
for (i = 0; i < N_FRAME_EXTS; i++)
@ -237,6 +287,9 @@ static Bool IsValidMediaExtension( ctmbstr sType )
{
uint i;
tmbchar ext[20];
if ( !IsFilePath(sType) ) return 0;
GetFileExtension( sType, ext, sizeof(ext) );
for (i = 0; i < N_MEDIA_EXTS; i++)
@ -2708,6 +2761,10 @@ static Bool CheckMetaData( TidyDocImpl* doc, Node* node, Bool HasMetaData )
TY_(ReportAccessError)( doc, node, REMOVE_AUTO_REDIRECT);
}
}
if (TY_(IsHTML5Mode)(doc) && attrIsCHARSET(av) && hasValue(av))
{
ContainsAttr = yes;
}
}
if ( HasContent || HasHttpEquiv )
@ -2787,9 +2844,17 @@ static void CheckDocType( TidyDocImpl* doc )
if (DTnode && DTnode->end != 0)
{
ctmbstr word = textFromOneNode( doc, DTnode);
if ((strstr (word, "HTML PUBLIC") == NULL) &&
(strstr (word, "html PUBLIC") == NULL))
DTnode = NULL;
if (TY_(IsHTML5Mode)(doc))
{
if ((strstr(word, "HTML") == NULL) &&
(strstr(word, "html") == NULL))
DTnode = NULL;
}
else {
if ((strstr(word, "HTML PUBLIC") == NULL) &&
(strstr(word, "html PUBLIC") == NULL))
DTnode = NULL;
}
}
if (!DTnode)
TY_(ReportAccessError)( doc, &doc->root, DOCTYPE_MISSING);

View File

@ -320,6 +320,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyUpperCaseTags, MU, "uppercase-tags", BL, no, ParseBool, boolPicks },
{ TidyUseCustomTags, MU, "custom-tags", IN, TidyCustomNo, ParseUseCustomTags,customTagsPicks }, /* 20170309 - Issue #119 */
{ TidyVertSpace, PP, "vertical-space", IN, no, ParseAutoBool, autoBoolPicks }, /* #228 - tri option */
{ TidyWarnPropAttrs, MU, "warn-proprietary-attributes", BL, yes, ParseBool, boolPicks },
{ TidyWord2000, MU, "word-2000", BL, no, ParseBool, boolPicks },
{ TidyWrapAsp, PP, "wrap-asp", BL, yes, ParseBool, boolPicks },
{ TidyWrapAttVals, PP, "wrap-attributes", BL, no, ParseBool, boolPicks },

View File

@ -115,6 +115,11 @@ static void CleanNode( TidyDocImpl* doc, Node *node )
else if (nodeIsA(child) && !child->content)
{
AttVal *id = TY_(GetAttrByName)( child, "name" );
/* Recent Google Docs is using "id" instead of "name" in
** the exported html.
*/
if (!id)
id = TY_(GetAttrByName)( child, "id" );
if (id)
TY_(RepairAttrValue)( doc, child->parent, "id", id->value );

View File

@ -1167,10 +1167,10 @@ static languageDefinition language_en = { whichPluralForm_en, {
be translated. */
TidyStrictTagsAttr, 0,
"This options ensures that tags and attributes are applicable for the "
"version of HTML that Tidy outputs. When set to <var>yes</var> (the "
"default) and the output document type is a strict doctype, then Tidy "
"will report errors. If the output document type is a loose or "
"transitional doctype, then Tidy will report warnings. "
"version of HTML that Tidy outputs. When set to <var>yes</var> and the "
"output document type is a strict doctype, then Tidy will report "
"errors. If the output document type is a loose or transitional "
"doctype, then Tidy will report warnings. "
"<br/>"
"Additionally if <code>drop-proprietary-attributes</code> is enabled, "
"then not applicable attributes will be dropped, too. "
@ -1261,6 +1261,17 @@ static languageDefinition language_en = { whichPluralForm_en, {
"If set to <var>auto</var> Tidy will eliminate nearly all newline "
"characters."
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
<br/>.
- Entities, tags, attributes, etc., should be enclosed in <code></code>.
- Option values should be enclosed in <var></var>.
- It's very important that <br/> be self-closing!
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
TidyWarnPropAttrs, 0,
"This option specifies if Tidy should warn on proprietary attributes."
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
<br/>.
@ -1600,14 +1611,6 @@ static languageDefinition language_en = { whichPluralForm_en, {
" search stops for the current direction.\n"
" TD cells that set the axis attribute are also treated as header cells.\n"
},
{/* This console output should be limited to 78 characters per line. */
TEXT_WINDOWS_CHARS, 0,
"Characters codes for the Microsoft Windows fonts in the range\n"
"128 - 159 may not be recognized on other platforms. You are\n"
"instead recommended to use named entities, e.g. &trade; rather\n"
"than Windows character code 153 (0x2122 in Unicode). Note that\n"
"as of February 1998 few browsers support the new entities.\n"
},
{/* This console output should be limited to 78 characters per line.
- %s represents a string-encoding name which may be localized in your language. */
TEXT_VENDOR_CHARS, 0,

View File

@ -757,13 +757,6 @@ void TY_(ErrorSummary)( TidyDocImpl* doc )
if (doc->badChars)
{
#if 0
if ( doc->badChars & WINDOWS_CHARS )
{
message = TY_(tidyMessageCreate)( doc, TEXT_WINDOWS_CHARS, TidyDialogueDoc);
messagePos(message);
}
#endif
if (doc->badChars & BC_VENDOR_SPECIFIC_CHARS)
{
message = TY_(tidyMessageCreate)( doc, TEXT_VENDOR_CHARS, TidyDialogueDoc, encnam);

View File

@ -184,7 +184,7 @@ static Dict tag_defs[] =
{ TidyTag_BLOCKQUOTE, "blockquote", VERS_ELEM_BLOCKQUOTE, &TY_(W3CAttrsFor_BLOCKQUOTE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_BODY, "body", VERS_ELEM_BODY, &TY_(W3CAttrsFor_BODY)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseBody), NULL },
{ TidyTag_BR, "br", VERS_ELEM_BR, &TY_(W3CAttrsFor_BR)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_BUTTON, "button", VERS_ELEM_BUTTON, &TY_(W3CAttrsFor_BUTTON)[0], (CM_INLINE), TY_(ParseBlock), NULL },
{ TidyTag_BUTTON, "button", VERS_ELEM_BUTTON, &TY_(W3CAttrsFor_BUTTON)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_CAPTION, "caption", VERS_ELEM_CAPTION, &TY_(W3CAttrsFor_CAPTION)[0], (CM_TABLE), TY_(ParseBlock), CheckCaption },
{ TidyTag_CENTER, "center", VERS_ELEM_CENTER, &TY_(W3CAttrsFor_CENTER)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_CITE, "cite", VERS_ELEM_CITE, &TY_(W3CAttrsFor_CITE)[0], (CM_INLINE), TY_(ParseInline), NULL },
@ -773,9 +773,6 @@ void TY_(AdjustTags)( TidyDocImpl *doc )
{
np->parser = TY_(ParseInline);
np->model = CM_INLINE;
#if ELEMENT_HASH_LOOKUP
tagsEmptyHash( doc, tags );
#endif
}
/*\
@ -787,9 +784,6 @@ void TY_(AdjustTags)( TidyDocImpl *doc )
if (np)
{
np->parser = TY_(ParseInline);
#if ELEMENT_HASH_LOOKUP
tagsEmptyHash( doc, tags );
#endif
}
/*\
@ -801,10 +795,24 @@ void TY_(AdjustTags)( TidyDocImpl *doc )
if (np)
{
np->model |= CM_HEAD; /* add back allowed in head */
#if ELEMENT_HASH_LOOKUP
tagsEmptyHash( doc, tags );
#endif
}
/*\
* Issue #461
* TidyTag_BUTTON is a block in HTML4,
* whereas it is inline in HTML5
\*/
np = (Dict *)TY_(LookupTagDef)(TidyTag_BUTTON);
if (np)
{
np->parser = TY_(ParseBlock);
}
#if ELEMENT_HASH_LOOKUP
tagsEmptyHash(doc, tags); /* not sure this is really required, but to be sure */
#endif
doc->HTML5Mode = no; /* set *NOT* HTML5 mode */
}
Bool TY_(IsHTML5Mode)( TidyDocImpl *doc )
@ -839,6 +847,16 @@ void TY_(ResetTags)( TidyDocImpl *doc )
{
np->model = (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM); /* reset */
}
/*\
* Issue #461
* TidyTag_BUTTON reset to inline in HTML5
\*/
np = (Dict *)TY_(LookupTagDef)(TidyTag_BUTTON);
if (np)
{
np->parser = TY_(ParseInline);
}
#if ELEMENT_HASH_LOOKUP
tagsEmptyHash( doc, tags ); /* not sure this is really required, but to be sure */
#endif
@ -858,7 +876,6 @@ void TY_(FreeTags)( TidyDocImpl* doc )
/* get rid of dangling tag references */
TidyClearMemory( tags, sizeof(TidyTagImpl) );
doc->HTML5Mode = no; /* reset html5 mode == legacy html4 mode */
}

View File

@ -1823,7 +1823,10 @@ void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node )
attrIsMismatched = check_versions ? TY_(AttributeIsMismatched)(node, attval, doc) : no;
/* Let the PROPRIETARY_ATTRIBUTE warning have precedence. */
if ( attrIsProprietary )
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
{
if ( cfgBool(doc, TidyWarnPropAttrs) )
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
}
else if ( attrIsMismatched )
{
TY_(ReportAttrError)(doc, node, attval, attrReportType);

11
tidy.pc.cmake.in Normal file
View File

@ -0,0 +1,11 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=${exec_prefix}/@LIB_INSTALL_DIR@
includedir=${prefix}/@INCLUDE_INSTALL_DIR@
Name: @LIB_NAME@
Description: @LIBTIDY_DESCRIPTION@
URL: @LIBTIDY_URL@
Version: @LIBTIDY_VERSION@
Libs: -L${libdir} -l@LIB_NAME@
Cflags: -I${includedir}

View File

@ -1,2 +1,2 @@
5.5.13
2017.03.31
5.5.18
2017.05.06