Eventually complete a 2007 fix
This commit is contained in:
parent
cd8dca2d4c
commit
d541405a2a
|
@ -205,6 +205,7 @@ typedef enum
|
||||||
TidyMergeSpans, /**< Merge multiple SPANs */
|
TidyMergeSpans, /**< Merge multiple SPANs */
|
||||||
TidyAnchorAsName, /**< Define anchors as name attributes */
|
TidyAnchorAsName, /**< Define anchors as name attributes */
|
||||||
TidyPPrintTabs, /**< Indent using tabs istead of spaces */
|
TidyPPrintTabs, /**< Indent using tabs istead of spaces */
|
||||||
|
TidySkipQuotes, /**< Skip quotes and comments in script CDATA */
|
||||||
N_TIDY_OPTIONS /**< Must be last */
|
N_TIDY_OPTIONS /**< Must be last */
|
||||||
} TidyOptionId;
|
} TidyOptionId;
|
||||||
|
|
||||||
|
|
|
@ -321,6 +321,7 @@ static const TidyOptionImpl option_defs[] =
|
||||||
{ TidyMergeSpans, MU, "merge-spans", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
|
{ TidyMergeSpans, MU, "merge-spans", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
|
||||||
{ TidyAnchorAsName, MU, "anchor-as-name", BL, yes, ParseBool, boolPicks },
|
{ TidyAnchorAsName, MU, "anchor-as-name", BL, yes, ParseBool, boolPicks },
|
||||||
{ TidyPPrintTabs, PP, "indent-with-tabs", BL, no, ParseTabs, boolPicks }, /* 20150515 - Issue #108 */
|
{ TidyPPrintTabs, PP, "indent-with-tabs", BL, no, ParseTabs, boolPicks }, /* 20150515 - Issue #108 */
|
||||||
|
{ TidySkipQuotes, MU, "skip-quotes", BL, no, ParseBool, boolPicks }, /* 1642186 - Issue #65 */
|
||||||
{ N_TIDY_OPTIONS, XX, NULL, XY, 0, NULL, NULL }
|
{ N_TIDY_OPTIONS, XX, NULL, XY, 0, NULL, NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
75
src/lexer.c
75
src/lexer.c
|
@ -1947,6 +1947,73 @@ static Bool ExpectsContent(Node *node)
|
||||||
return yes;
|
return yes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*\
|
||||||
|
* Issue #65 - also see http://tidy.sf.net/issue/1642186
|
||||||
|
* Parser too gready over <script> blocks
|
||||||
|
*
|
||||||
|
* The idea is to scan the current lexer data, and
|
||||||
|
* return a Bool
|
||||||
|
* yes = we are in a javascript comment text, either type,
|
||||||
|
* or are within quotes, either single or double
|
||||||
|
* no = Not in any of the above.
|
||||||
|
*
|
||||||
|
* This is to avoid tidy finding tags in quoted or comment text.
|
||||||
|
*
|
||||||
|
* Controlled by option --skip-quotes yes|no, enum as
|
||||||
|
* TidySkipQuotes, off by default.
|
||||||
|
\*/
|
||||||
|
static Bool IsInQuotesorComment( Lexer * lexer )
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
Bool inq, toeol, toec;
|
||||||
|
unsigned char prev, quot, c;
|
||||||
|
prev = quot = 0;
|
||||||
|
inq = toeol = toec = no;
|
||||||
|
for ( i = lexer->txtstart; i < lexer->lexsize; i++ )
|
||||||
|
{
|
||||||
|
c = lexer->lexbuf[i];
|
||||||
|
if ( toeol )
|
||||||
|
{
|
||||||
|
/* continue until END OF LINE */
|
||||||
|
if ( c == '\n' )
|
||||||
|
{
|
||||||
|
toeol = no;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( toec )
|
||||||
|
{
|
||||||
|
/* continue until END OF COMMENT */
|
||||||
|
if ( ( c == '/' ) && ( prev == '*' ) )
|
||||||
|
toec = no;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (( prev != '\\' ) && (( c == '"' ) || ( c == '\'')) )
|
||||||
|
{
|
||||||
|
if ( inq && ( c == quot ))
|
||||||
|
{
|
||||||
|
inq = no;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
inq = yes;
|
||||||
|
quot = c; /* keep type of start quote - single or double */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( !inq && ( c == '/' ) && (prev == '/') )
|
||||||
|
{
|
||||||
|
toeol = yes; /* set in comment, until END OF LINE */
|
||||||
|
}
|
||||||
|
else if ( !inq && ( c == '*' ) && (prev == '/'))
|
||||||
|
{
|
||||||
|
toec = yes; /* set until END OF COMMENT */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prev = c;
|
||||||
|
}
|
||||||
|
return (inq | toeol | toec);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
create a text node for the contents of
|
create a text node for the contents of
|
||||||
a CDATA element like style or script
|
a CDATA element like style or script
|
||||||
|
@ -1971,6 +2038,7 @@ static Node *GetCDATA( TidyDocImpl* doc, Node *container )
|
||||||
Bool matches = no;
|
Bool matches = no;
|
||||||
uint c;
|
uint c;
|
||||||
Bool hasSrc = TY_(AttrGetById)(container, TidyAttr_SRC) != NULL;
|
Bool hasSrc = TY_(AttrGetById)(container, TidyAttr_SRC) != NULL;
|
||||||
|
Bool skipquotes = cfgBool(doc, TidySkipQuotes); /* #65 - get CONFIG option */
|
||||||
|
|
||||||
SetLexerLocus( doc, lexer );
|
SetLexerLocus( doc, lexer );
|
||||||
lexer->waswhite = no;
|
lexer->waswhite = no;
|
||||||
|
@ -1991,6 +2059,13 @@ static Node *GetCDATA( TidyDocImpl* doc, Node *container )
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*\
|
||||||
|
* Issue #65 - sf 1642186 - try to skip "...", '...', // ...\n, and
|
||||||
|
* other C/C++ like comment blocks, if the option enabled
|
||||||
|
\*/
|
||||||
|
if ( skipquotes && IsInQuotesorComment(lexer) )
|
||||||
|
continue;
|
||||||
|
|
||||||
c = TY_(ReadChar)(doc->docIn);
|
c = TY_(ReadChar)(doc->docIn);
|
||||||
|
|
||||||
if (TY_(IsLetter)(c))
|
if (TY_(IsLetter)(c))
|
||||||
|
|
|
@ -925,6 +925,10 @@ static const TidyOptionDoc option_docs[] =
|
||||||
"Note TidyTabSize controls converting input tabs to spaces. Set to zero "
|
"Note TidyTabSize controls converting input tabs to spaces. Set to zero "
|
||||||
"to retain input tabs. "
|
"to retain input tabs. "
|
||||||
},
|
},
|
||||||
|
{TidySkipQuotes,
|
||||||
|
"This option specifies that Tidy should skip quotes, and comments "
|
||||||
|
"when parsing script data. "
|
||||||
|
},
|
||||||
{N_TIDY_OPTIONS,
|
{N_TIDY_OPTIONS,
|
||||||
NULL
|
NULL
|
||||||
}
|
}
|
||||||
|
|
7
test/input/cfg_1642186-1.txt
Normal file
7
test/input/cfg_1642186-1.txt
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
skip-quotes: yes
|
||||||
|
indent: auto
|
||||||
|
tidy-mark: no
|
||||||
|
clean: yes
|
||||||
|
logical-emphasis: yes
|
||||||
|
indent-attributes: yes
|
||||||
|
show-info: no
|
14
test/input/in_1642186-1.html
Normal file
14
test/input/in_1642186-1.html
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Issue #65 - Parser too greedy over script blocks</title>
|
||||||
|
<script>
|
||||||
|
/* the <script */
|
||||||
|
var m1 = "\"<script \"";
|
||||||
|
var m2 = '<script '; // <script
|
||||||
|
</script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -1,2 +1,2 @@
|
||||||
5.1.9
|
5.1.10
|
||||||
2015.09.10
|
2015.09.16
|
||||||
|
|
Loading…
Reference in a new issue