From 8b2f92f625ed7c0b5d0de6fdf60e1248b7953626 Mon Sep 17 00:00:00 2001 From: Jim Derry Date: Wed, 3 May 2017 16:15:44 -0400 Subject: [PATCH 1/2] Issue #338 occurs because the existing routines assume that any URI with an extension is a file, and so links to TLD's ending with .pl, .au, etc., will cause accessibility warnings. This fix attempts to distinguish between URI's that are likely to be files versus links to domains. --- src/access.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/src/access.c b/src/access.c index f882e8c..7244492 100644 --- a/src/access.c +++ b/src/access.c @@ -131,6 +131,49 @@ static void MetaDataPresent( TidyDocImpl* doc, Node* node ); static void CheckEmbed( TidyDocImpl* doc, Node* node ); static void CheckListUsage( TidyDocImpl* doc, Node* node ); +/* + IsFilePath attempts to determine whether or not the URI indicated + by path is a file rather than a TLD. For example, sample.com.au might + be confused with an audio file. +*/ +static Bool IsFilePath( ctmbstr path ) +{ + const char *p = path; + char c; + typedef enum states { initial, protocol_found, slash_found, file_found } states; + states state = initial; + + while ( ( c = *p++ ) != 0 && state != file_found ) + { + switch ( state ) + { + case initial: + if ( c == ':' ) + state = protocol_found; + break; + + case protocol_found: + if ( c =='/' ) + state = slash_found; + break; + + case slash_found: + if ( c =='/' ) + state = protocol_found; + else + state = file_found; + break; + + default: + break; + } + + } + + return state == file_found || state == initial; +} + + /* GetFileExtension takes a path and returns the extension portion of the path (if any). @@ -163,9 +206,10 @@ static void GetFileExtension( ctmbstr path, tmbchar *ext, uint maxExt ) static Bool IsImage( ctmbstr iType ) { uint i; - - /* Get the file extension */ tmbchar ext[20]; + + if ( !IsFilePath(iType) ) return 0; + GetFileExtension( iType, ext, sizeof(ext) ); /* Compare it to the array of known image file extensions */ @@ -190,8 +234,11 @@ static int IsSoundFile( ctmbstr sType ) { uint i; tmbchar ext[ 20 ]; - GetFileExtension( sType, ext, sizeof(ext) ); + if ( !IsFilePath(sType) ) return 0; + + GetFileExtension( sType, ext, sizeof(ext) ); + for (i = 0; i < N_AUDIO_EXTS; i++) { if ( TY_(tmbstrcasecmp)(ext, soundExtensions[i]) == 0 ) @@ -215,6 +262,9 @@ static Bool IsValidSrcExtension( ctmbstr sType ) { uint i; tmbchar ext[20]; + + if ( !IsFilePath(sType) ) return 0; + GetFileExtension( sType, ext, sizeof(ext) ); for (i = 0; i < N_FRAME_EXTS; i++) @@ -237,6 +287,9 @@ static Bool IsValidMediaExtension( ctmbstr sType ) { uint i; tmbchar ext[20]; + + if ( !IsFilePath(sType) ) return 0; + GetFileExtension( sType, ext, sizeof(ext) ); for (i = 0; i < N_MEDIA_EXTS; i++) From d142527a8ebc23f800ad9c6df976ca49b447e31e Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Thu, 4 May 2017 17:36:39 +0200 Subject: [PATCH 2/2] Issue #338 - Deal with two other spurious access warnings --- src/access.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/access.c b/src/access.c index 7244492..ac3b9aa 100644 --- a/src/access.c +++ b/src/access.c @@ -2761,6 +2761,10 @@ static Bool CheckMetaData( TidyDocImpl* doc, Node* node, Bool HasMetaData ) TY_(ReportAccessError)( doc, node, REMOVE_AUTO_REDIRECT); } } + if (TY_(IsHTML5Mode)(doc) && attrIsCHARSET(av) && hasValue(av)) + { + ContainsAttr = yes; + } } if ( HasContent || HasHttpEquiv ) @@ -2840,9 +2844,17 @@ static void CheckDocType( TidyDocImpl* doc ) if (DTnode && DTnode->end != 0) { ctmbstr word = textFromOneNode( doc, DTnode); - if ((strstr (word, "HTML PUBLIC") == NULL) && - (strstr (word, "html PUBLIC") == NULL)) - DTnode = NULL; + if (TY_(IsHTML5Mode)(doc)) + { + if ((strstr(word, "HTML") == NULL) && + (strstr(word, "html") == NULL)) + DTnode = NULL; + } + else { + if ((strstr(word, "HTML PUBLIC") == NULL) && + (strstr(word, "html PUBLIC") == NULL)) + DTnode = NULL; + } } if (!DTnode) TY_(ReportAccessError)( doc, &doc->root, DOCTYPE_MISSING);