option to avoid incrementing nested comtainers.
This is in the GetCDATA function. If the container is script or style and this option is on, avoid bumping nested. This addresses issues #65 (1642186) and #280. All attempts at parsing script data are now abandoned as a bad direction.
This commit is contained in:
parent
b4efe7464a
commit
b63c1090c2
121
src/lexer.c
121
src/lexer.c
|
@ -1972,111 +1972,6 @@ static Bool ExpectsContent(Node *node)
|
||||||
return yes;
|
return yes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*\
|
|
||||||
* Issue #65 - also see http://tidy.sf.net/issue/1642186
|
|
||||||
* Parser too gready over <script> blocks
|
|
||||||
*
|
|
||||||
* The idea is to scan the current lexer data, and
|
|
||||||
* return a Bool
|
|
||||||
* yes = we are in a javascript comment text, either type,
|
|
||||||
* or are within quotes, either single or double
|
|
||||||
* no = Not in any of the above.
|
|
||||||
*
|
|
||||||
* This is to avoid tidy finding tags in quoted or comment text.
|
|
||||||
*
|
|
||||||
* Controlled by option --skip-quotes yes|no, enum as
|
|
||||||
* TidySkipQuotes, off by default.
|
|
||||||
\*/
|
|
||||||
#define MX_TAG_BUFF 16
|
|
||||||
static Bool IsInQuotesorComment( Lexer * lexer )
|
|
||||||
{
|
|
||||||
unsigned int i, cnt, off;
|
|
||||||
Bool inq, toeol, toec;
|
|
||||||
unsigned char prev, quot, c;
|
|
||||||
tmbchar buff[MX_TAG_BUFF];
|
|
||||||
tmbstr pnc;
|
|
||||||
prev = quot = 0;
|
|
||||||
inq = toeol = toec = no;
|
|
||||||
cnt = 0;
|
|
||||||
off = 0;
|
|
||||||
for ( i = lexer->txtstart; i < lexer->lexsize; i++ )
|
|
||||||
{
|
|
||||||
pnc = &lexer->lexbuf[i];
|
|
||||||
c = *pnc;
|
|
||||||
if ( ! TY_(IsWhite)(c) ) {
|
|
||||||
if (c == '<') {
|
|
||||||
off = 0;
|
|
||||||
buff[off++] = c;
|
|
||||||
} else if (off && ((off + 1) < MX_TAG_BUFF)) {
|
|
||||||
buff[off++] = c;
|
|
||||||
if ( c == '[' ) {
|
|
||||||
buff[off] = 0;
|
|
||||||
if (TY_(tmbstrcmp)(buff,"<![CDATA[") == 0) {
|
|
||||||
/* Ignore **EVERYTHING** until "]]>" */
|
|
||||||
i++;
|
|
||||||
off = 0;
|
|
||||||
for ( ; i <= lexer->lexsize - 3; i++ )
|
|
||||||
{
|
|
||||||
pnc = &lexer->lexbuf[i];
|
|
||||||
if (*pnc == ']' && *(pnc + 1) == ']' && *(pnc + 2) == '>')
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cnt++;
|
|
||||||
}
|
|
||||||
if ( toeol )
|
|
||||||
{
|
|
||||||
/* continue until END OF LINE */
|
|
||||||
if ( c == '\n' )
|
|
||||||
{
|
|
||||||
toeol = no;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if ( toec )
|
|
||||||
{
|
|
||||||
/* continue until END OF COMMENT */
|
|
||||||
if ( ( c == '/' ) && ( prev == '*' ) )
|
|
||||||
toec = no;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (( c == '\\' ) && ( prev == '\\' )) {
|
|
||||||
prev = 0; /* use up the escape */
|
|
||||||
continue; /* 5.1.12 #65 (1642186): Ign 'escaped' escape ;=)) eg "+\\" */
|
|
||||||
}
|
|
||||||
if (( prev != '\\' ) && (( c == '"' ) || ( c == '\'')) )
|
|
||||||
{
|
|
||||||
/* deal with 'unescaped' quote chars " or ' */
|
|
||||||
if ( inq && ( c == quot ))
|
|
||||||
{
|
|
||||||
inq = no;
|
|
||||||
}
|
|
||||||
else if ( !inq ) /* 20150919: Oops no new 'quote' if already in a 'quote' */
|
|
||||||
{
|
|
||||||
inq = yes;
|
|
||||||
quot = c; /* keep type of start quote - single or double */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if ( !inq && ( c == '/' ) && (prev == '/') )
|
|
||||||
{
|
|
||||||
/* except, if we have "//<!CDATA..." or "//]]>" */
|
|
||||||
if (cnt > 2)
|
|
||||||
toeol = yes; /* set in comment, until END OF LINE */
|
|
||||||
}
|
|
||||||
else if ( !inq && ( c == '*' ) && (prev == '/'))
|
|
||||||
{
|
|
||||||
toec = yes; /* set until END OF COMMENT */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
prev = c;
|
|
||||||
}
|
|
||||||
return (inq | toeol | toec);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
create a text node for the contents of
|
create a text node for the contents of
|
||||||
a CDATA element like style or script
|
a CDATA element like style or script
|
||||||
|
@ -2101,8 +1996,11 @@ static Node *GetCDATA( TidyDocImpl* doc, Node *container )
|
||||||
Bool matches = no;
|
Bool matches = no;
|
||||||
uint c;
|
uint c;
|
||||||
Bool hasSrc = (TY_(AttrGetById)(container, TidyAttr_SRC) != NULL) ? yes : no;
|
Bool hasSrc = (TY_(AttrGetById)(container, TidyAttr_SRC) != NULL) ? yes : no;
|
||||||
Bool skipquotes = (nodeIsSCRIPT(container) &&
|
/*\ Issue #65 (1642186) and #280 - is script or style, and the option on
|
||||||
cfgBool(doc, TidySkipQuotes)) ? yes : no; /* #65 (1642186) - is script, and on */
|
* If yes, then avoid incrementing nested...
|
||||||
|
\*/
|
||||||
|
Bool nonested = ((nodeIsSCRIPT(container) || (nodeIsSTYLE(container))) &&
|
||||||
|
cfgBool(doc, TidySkipQuotes)) ? yes : no;
|
||||||
|
|
||||||
SetLexerLocus( doc, lexer );
|
SetLexerLocus( doc, lexer );
|
||||||
lexer->waswhite = no;
|
lexer->waswhite = no;
|
||||||
|
@ -2123,13 +2021,6 @@ static Node *GetCDATA( TidyDocImpl* doc, Node *container )
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*\
|
|
||||||
* Issue #65 - sf 1642186 - try to skip "...", '...', // ...\n, and
|
|
||||||
* other C/C++ like comment blocks, if the option enabled
|
|
||||||
\*/
|
|
||||||
if ( skipquotes && IsInQuotesorComment(lexer) )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
c = TY_(ReadChar)(doc->docIn);
|
c = TY_(ReadChar)(doc->docIn);
|
||||||
|
|
||||||
if (TY_(IsLetter)(c))
|
if (TY_(IsLetter)(c))
|
||||||
|
@ -2202,7 +2093,7 @@ static Node *GetCDATA( TidyDocImpl* doc, Node *container )
|
||||||
|
|
||||||
matches = TY_(tmbstrncasecmp)(container->element, lexer->lexbuf + start,
|
matches = TY_(tmbstrncasecmp)(container->element, lexer->lexbuf + start,
|
||||||
TY_(tmbstrlen)(container->element)) == 0;
|
TY_(tmbstrlen)(container->element)) == 0;
|
||||||
if (matches)
|
if (matches && !nonested)
|
||||||
nested++;
|
nested++;
|
||||||
|
|
||||||
state = CDATA_INTERMEDIATE;
|
state = CDATA_INTERMEDIATE;
|
||||||
|
|
Loading…
Reference in a new issue