From 169bd38adf77f4ebf07dd10d42225e7972c38757 Mon Sep 17 00:00:00 2001 From: Marcos Caceres Date: Tue, 4 Oct 2016 14:29:06 +1100 Subject: [PATCH 1/9] Part 1 - Add basic infra for 'add-meta-charset' option --- include/tidyenum.h | 1 + src/config.c | 1 + src/language_en.h | 12 ++++++++++++ src/lexer.c | 10 ++++++++++ src/lexer.h | 3 +++ src/tidylib.c | 4 ++++ 6 files changed, 31 insertions(+) diff --git a/include/tidyenum.h b/include/tidyenum.h index f494afb..d3c34f3 100644 --- a/include/tidyenum.h +++ b/include/tidyenum.h @@ -172,6 +172,7 @@ typedef enum TidySkipNested, /**< Skip nested tags in script and style CDATA */ TidyStrictTagsAttr, /**< Ensure tags and attributes match output HTML version */ TidyEscapeScripts, /**< Escape items that look like closing tags in script tags */ + TidyMetaCharset, /**< Adds/checks/fixes meta charset in the head, based on document type */ N_TIDY_OPTIONS /**< Must be last */ } TidyOptionId; diff --git a/src/config.c b/src/config.c index ddb677c..f040c96 100644 --- a/src/config.c +++ b/src/config.c @@ -324,6 +324,7 @@ static const TidyOptionImpl option_defs[] = { TidySkipNested, MU, "skip-nested", BL, yes, ParseBool, boolPicks }, /* 1642186 - Issue #65 */ { TidyStrictTagsAttr, MU, "strict-tags-attributes", BL, no, ParseBool, boolPicks }, /* 20160209 - Issue #350 */ { TidyEscapeScripts, PP, "escape-scripts", BL, yes, ParseBool, boolPicks }, /* 20160227 - Issue #348 */ + { TidyMetaCharset, MS, "add-meta-charset", BL, yes, ParseBool, boolPicks }, /* 20161004 - Issue #456 */ { N_TIDY_OPTIONS, XX, NULL, XY, 0, NULL, NULL } }; diff --git a/src/language_en.h b/src/language_en.h index 7a8d1de..316ec09 100644 --- a/src/language_en.h +++ b/src/language_en.h @@ -2080,6 +2080,18 @@ static languageDefinition language_en = { whichPluralForm_en, { "This option causes items that look like closing tags, like </g to be escaped " "to <\\/g. Set this option to 'no' if you do not want this." }, + {/* Important notes for translators: + - Use only , , , , and +
. + - Entities, tags, attributes, etc., should be enclosed in . + - Option values should be enclosed in . + - It's very important that
be self-closing! + - The strings "Tidy" and "HTML Tidy" are the program name and must not + be translated. */ + TidyMetaCharset, 0, + "This option adds a meta element and sets the charset attribute to the encoding of the document." + "Set this option to 'yes' if you want this." + }, /******************************************************** ** Console Application diff --git a/src/lexer.c b/src/lexer.c index 6c22085..ffc4394 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1674,6 +1674,16 @@ Node *TY_(FindBody)( TidyDocImpl* doc ) return node; } +/* Check meta charset*/ +Bool TY_(TidyMetaCharset)( TidyDocImpl* doc ) +{ + AttVal *attval; + Node *node; + Node *head = TY_(FindHEAD)( doc ); + printf("hello"); + return no; +} + /* add meta element for Tidy */ Bool TY_(AddGenerator)( TidyDocImpl* doc ) { diff --git a/src/lexer.h b/src/lexer.h index 0c8d5bb..e390e7a 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -491,6 +491,9 @@ Node* TY_(FindXmlDecl)(TidyDocImpl* doc); /* Returns containing block element, if any */ Node* TY_(FindContainer)( Node* node ); +/* Adds meta element and sets the charset */ +Bool TY_(TidyMetaCharset)( TidyDocImpl* doc ); + /* add meta element for Tidy */ Bool TY_(AddGenerator)( TidyDocImpl* doc ); diff --git a/src/tidylib.c b/src/tidylib.c index 4787336..4753ab1 100755 --- a/src/tidylib.c +++ b/src/tidylib.c @@ -1795,6 +1795,7 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc ) Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); Bool xmlDecl = cfgBool( doc, TidyXmlDecl ); Bool tidyMark = cfgBool( doc, TidyMark ); + Bool tidyMetaCharset = cfgBool( doc, TidyMetaCharset); Bool tidyXmlTags = cfgBool( doc, TidyXmlTags ); Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName ); Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis ); @@ -1898,6 +1899,9 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc ) if (tidyMark ) TY_(AddGenerator)(doc); + + if (tidyMetaCharset) + TY_(TidyMetaCharset)(doc); } /* ensure presence of initial */ From 040c22c6dc26a81d30832ebcc5be91baf983dd49 Mon Sep 17 00:00:00 2001 From: Marcos Caceres Date: Tue, 4 Oct 2016 16:13:05 +1100 Subject: [PATCH 2/9] Part 2 - Implement lexer logic --- src/lexer.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 6 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index ffc4394..2b73604 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1674,14 +1674,98 @@ Node *TY_(FindBody)( TidyDocImpl* doc ) return node; } -/* Check meta charset*/ -Bool TY_(TidyMetaCharset)( TidyDocImpl* doc ) +/* Check meta charset + 1. if there is no meta charset, it adds one. + 2. if there is a meta charset, it moves it to the top if HEAD. + 3. if it doesn't match the output encoding, warn about that. + 4. if there are duplicates, discard them. + */ +Bool TY_(TidyMetaCharset)(TidyDocImpl* doc) { - AttVal *attval; - Node *node; Node *head = TY_(FindHEAD)( doc ); - printf("hello"); - return no; + ctmbstr enc = TY_(GetEncodingNameFromTidyId)(cfg(doc, TidyOutCharEncoding)); + Bool charsetFound = no; + // We can't do anything we don't have a head or encoding is NULL + if( !head || !enc ) + return no; + + for (Node *node = head->content; node; node = node->next) + { + if (!nodeIsMETA(node)) + continue; + AttVal *charsetAttr = TY_(AttrGetById)(node, TidyAttr_CHARSET); + AttVal *httpEquivAttr = TY_(AttrGetById)(node, TidyAttr_HTTP_EQUIV); + if(!charsetAttr && !httpEquivAttr) + continue; + + // Meta charset comes in quite a few flavors: + // 1. - expected for (X)HTML5. + if (charsetAttr && !httpEquivAttr) + { + // we already found one + if(charsetFound) + { + TY_(DiscardElement)( doc, node ); + printf("WARNING ABOUT DISCARDING ELEMENT \n"); + continue; + } + charsetFound = yes; + tmbstr lCharset = TY_(tmbstrtolower)(charsetAttr->value); + if(strcmp(lCharset, enc) == 0) + { + // Move it to head + TY_(RemoveNode)( node ); + TY_(InsertNodeAtStart)( head, node ); + } + else + { + printf("WARN ABOUT MISMATCH: %s not match output %s \n", lCharset, enc); + TY_(RemoveNode)( node ); + TY_(InsertNodeAtStart)( head, node ); + } + continue; + } + + // 2. + // expected for HTML4. This is normally ok - but can clash. + if(httpEquivAttr && !charsetAttr) + { + AttVal *contentAttr = TY_(AttrGetById)(node, TidyAttr_CONTENT); + tmbstr lvalue = TY_(tmbstrtolower)(httpEquivAttr->value); + if(!contentAttr || strcmp(lvalue, "content-type") != 0) + continue; + tmbstr lcontent = TY_(tmbstrtolower)(contentAttr->value); + char expected[sizeof(enc) + 8] = "charset="; + strcat(expected, enc); + if(TY_(tmbsubstr)(lcontent, expected)){ + printf("WARN ABOUT CLASH: %s \n", contentAttr->value); + } + } + // 3. + // This is generally bad. + if(httpEquivAttr && charsetAttr) + { + printf("WARN ABOUT HTTP EQUIV AND CHARSET ATTR! \n"); + } + } + if(charsetFound){ + return yes; + } + Node *node = TY_(InferredTag)(doc, TidyTag_META); + switch(TY_(HTMLVersion)(doc)) + { + case HT50: + case XH50: + TY_(AddAttribute)( doc, node, "charset", enc); + break; + default: + TY_(AddAttribute)( doc, node, "http-equiv", "content-type"); + TY_(AddAttribute)( doc, node, "content", "text/html; charset="); + AttVal *contentAttr = TY_(AttrGetById)(node, TidyAttr_CONTENT); + TY_(tmbstrcat)(contentAttr->value, enc); + } + TY_(InsertNodeAtStart)( head, node ); + return yes; } /* add meta element for Tidy */ From cfc22ac46e4876170481a882ae3efa2641bbfb20 Mon Sep 17 00:00:00 2001 From: Marcos Caceres Date: Wed, 5 Oct 2016 18:54:25 +1100 Subject: [PATCH 3/9] Add garvankeeley's suggestions using calloc --- src/lexer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lexer.c b/src/lexer.c index 2b73604..568ac36 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1735,11 +1735,14 @@ Bool TY_(TidyMetaCharset)(TidyDocImpl* doc) if(!contentAttr || strcmp(lvalue, "content-type") != 0) continue; tmbstr lcontent = TY_(tmbstrtolower)(contentAttr->value); - char expected[sizeof(enc) + 8] = "charset="; + char* charsetString = "charset="; + char* expected = calloc(strlen(enc) + strlen(charsetString) + 1, sizeof(char*)); + strcat(expected, charsetString); strcat(expected, enc); if(TY_(tmbsubstr)(lcontent, expected)){ printf("WARN ABOUT CLASH: %s \n", contentAttr->value); } + free(expected); } // 3. // This is generally bad. From 2d7ddfef94eacc5f100cdc55887b96b17e746b95 Mon Sep 17 00:00:00 2001 From: Marcos Caceres Date: Wed, 5 Oct 2016 20:14:18 +1100 Subject: [PATCH 4/9] Part 2.1 - Bug fixes and warning --- src/lexer.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index 568ac36..882522c 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1697,29 +1697,31 @@ Bool TY_(TidyMetaCharset)(TidyDocImpl* doc) AttVal *httpEquivAttr = TY_(AttrGetById)(node, TidyAttr_HTTP_EQUIV); if(!charsetAttr && !httpEquivAttr) continue; - // Meta charset comes in quite a few flavors: // 1. - expected for (X)HTML5. if (charsetAttr && !httpEquivAttr) { - // we already found one + // we already found one, so remove the rest. if(charsetFound) { + Node *prevNode = node->prev; + TY_(ReportError)(doc, head, node, DISCARDING_UNEXPECTED); TY_(DiscardElement)( doc, node ); - printf("WARNING ABOUT DISCARDING ELEMENT \n"); + node = prevNode; continue; } charsetFound = yes; tmbstr lCharset = TY_(tmbstrtolower)(charsetAttr->value); - if(strcmp(lCharset, enc) == 0) + // Fix mismatched attribute value + if(strcmp(lCharset, enc) != 0) { - // Move it to head - TY_(RemoveNode)( node ); - TY_(InsertNodeAtStart)( head, node ); + tmbstr newValue = (tmbstr) TidyDocAlloc( doc, TY_(tmbstrlen)(enc) ); + TY_(tmbstrcpy)( newValue, enc ); + charsetAttr->value = newValue; + TY_(ReportError)( doc, head, node, BAD_ATTRIBUTE_VALUE_REPLACED ); } - else - { - printf("WARN ABOUT MISMATCH: %s not match output %s \n", lCharset, enc); + // Make sure it's the first element. + if ( node != head->next ){ TY_(RemoveNode)( node ); TY_(InsertNodeAtStart)( head, node ); } From b1629c4a4f5f6eb1462048aa74e793e045b92ed1 Mon Sep 17 00:00:00 2001 From: Marcos Caceres Date: Wed, 5 Oct 2016 20:22:19 +1100 Subject: [PATCH 5/9] fix(lexer): bad attribute reporting --- src/lexer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.c b/src/lexer.c index 882522c..91e0535 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1717,8 +1717,8 @@ Bool TY_(TidyMetaCharset)(TidyDocImpl* doc) { tmbstr newValue = (tmbstr) TidyDocAlloc( doc, TY_(tmbstrlen)(enc) ); TY_(tmbstrcpy)( newValue, enc ); + TY_(ReportAttrError)( doc, node, charsetAttr, BAD_ATTRIBUTE_VALUE_REPLACED ); charsetAttr->value = newValue; - TY_(ReportError)( doc, head, node, BAD_ATTRIBUTE_VALUE_REPLACED ); } // Make sure it's the first element. if ( node != head->next ){ From 53ee94ddbaabb5e222eac68dddc6492fbcf7a6e3 Mon Sep 17 00:00:00 2001 From: Marcos Caceres Date: Thu, 6 Oct 2016 19:07:44 +1100 Subject: [PATCH 6/9] fix: incorrect check for first element in head --- src/lexer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.c b/src/lexer.c index 91e0535..24dced8 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1721,7 +1721,7 @@ Bool TY_(TidyMetaCharset)(TidyDocImpl* doc) charsetAttr->value = newValue; } // Make sure it's the first element. - if ( node != head->next ){ + if ( node != head->content->next ){ TY_(RemoveNode)( node ); TY_(InsertNodeAtStart)( head, node ); } From 932cc104a676a8d0179830e7bf68f1e53a1d3ff3 Mon Sep 17 00:00:00 2001 From: Marcos Caceres Date: Thu, 6 Oct 2016 19:29:56 +1100 Subject: [PATCH 7/9] feat(attrask.c): learn about charset attr --- src/attrask.c | 4 ++++ src/attrget.c | 4 ++++ src/attrs.h | 2 ++ 3 files changed, 10 insertions(+) diff --git a/src/attrask.c b/src/attrask.c index 92c75d3..0528233 100644 --- a/src/attrask.c +++ b/src/attrask.c @@ -193,6 +193,10 @@ Bool TIDY_CALL tidyAttrIsROWSPAN( TidyAttr tattr ) { return attrIsROWSPAN( tidyAttrToImpl(tattr) ); } +Bool TIDY_CALL tidyAttrIsCHARSET( TidyAttr tattr ) +{ + return attrIsCHARSET( tidyAttrToImpl(tattr) ); +} /* * local variables: diff --git a/src/attrget.c b/src/attrget.c index 6562cc2..428620f 100644 --- a/src/attrget.c +++ b/src/attrget.c @@ -197,6 +197,10 @@ TidyAttr TIDY_CALL tidyAttrGetROWSPAN( TidyNode tnod ) { return tidyImplToAttr( attrGetROWSPAN( tidyNodeToImpl(tnod) ) ); } +TidyAttr TIDY_CALL tidyAttrGetCHARSET( TidyNode tnod ) +{ + return tidyImplToAttr( attrGetCHARSET( tidyNodeToImpl(tnod) ) ); +} /* * local variables: diff --git a/src/attrs.h b/src/attrs.h index e5b0fa9..0192efc 100644 --- a/src/attrs.h +++ b/src/attrs.h @@ -184,6 +184,7 @@ Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc); #define attrIsBOTTOMMARGIN(av) AttrIsId( av, TidyAttr_BOTTOMMARGIN ) #define attrIsCELLPADDING(av) AttrIsId( av, TidyAttr_CELLPADDING ) #define attrIsCELLSPACING(av) AttrIsId( av, TidyAttr_CELLSPACING ) +#define attrIsCHARSET(av) AttrIsId( av, TidyAttr_CHARSET ) #define attrIsCHAR(av) AttrIsId( av, TidyAttr_CHAR ) #define attrIsCHAROFF(av) AttrIsId( av, TidyAttr_CHAROFF ) #define attrIsCHARSET(av) AttrIsId( av, TidyAttr_CHARSET ) @@ -385,6 +386,7 @@ Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc); #define attrGetHEIGHT( nod ) TY_(AttrGetById)( nod, TidyAttr_HEIGHT ) #define attrGetFOR( nod ) TY_(AttrGetById)( nod, TidyAttr_FOR ) #define attrGetSELECTED( nod ) TY_(AttrGetById)( nod, TidyAttr_SELECTED ) +#define attrGetCHARSET( nod ) TY_(AttrGetById)( nod, TidyAttr_CHARSET ) #define attrGetCHECKED( nod ) TY_(AttrGetById)( nod, TidyAttr_CHECKED ) #define attrGetLANG( nod ) TY_(AttrGetById)( nod, TidyAttr_LANG ) #define attrGetTARGET( nod ) TY_(AttrGetById)( nod, TidyAttr_TARGET ) From 523d58b00448453040b4ad13d0b864f437b30da1 Mon Sep 17 00:00:00 2001 From: Marcos Caceres Date: Thu, 6 Oct 2016 19:30:23 +1100 Subject: [PATCH 8/9] refactor: ask for charset and http_equiv attrs --- src/lexer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index 24dced8..0a48e53 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1693,8 +1693,8 @@ Bool TY_(TidyMetaCharset)(TidyDocImpl* doc) { if (!nodeIsMETA(node)) continue; - AttVal *charsetAttr = TY_(AttrGetById)(node, TidyAttr_CHARSET); - AttVal *httpEquivAttr = TY_(AttrGetById)(node, TidyAttr_HTTP_EQUIV); + AttVal *charsetAttr = attrGetCHARSET(node); + AttVal *httpEquivAttr = attrGetHTTP_EQUIV(node); if(!charsetAttr && !httpEquivAttr) continue; // Meta charset comes in quite a few flavors: From aff76bec380ccb22701f772e9fba3f68bb414fb5 Mon Sep 17 00:00:00 2001 From: Marcos Caceres Date: Mon, 17 Oct 2016 17:00:58 +1100 Subject: [PATCH 9/9] fix(lexer.c): fixes from initial review --- src/lexer.c | 102 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 43 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index 0a48e53..7f3d683 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1682,72 +1682,86 @@ Node *TY_(FindBody)( TidyDocImpl* doc ) */ Bool TY_(TidyMetaCharset)(TidyDocImpl* doc) { - Node *head = TY_(FindHEAD)( doc ); - ctmbstr enc = TY_(GetEncodingNameFromTidyId)(cfg(doc, TidyOutCharEncoding)); + AttVal *charsetAttr; + AttVal *contentAttr; + AttVal *httpEquivAttr; Bool charsetFound = no; - // We can't do anything we don't have a head or encoding is NULL + ctmbstr enc = TY_(GetEncodingNameFromTidyId)(cfg(doc, TidyOutCharEncoding)); + Node *currentNode; + Node *head = TY_(FindHEAD)( doc ); + Node *metaTag; + Node *prevNode; + TidyBuffer buf; + TidyBuffer charsetString; + tmbstr httpEquivAttrValue; + tmbstr lcontent; + tmbstr newValue; + /* We can't do anything we don't have a head or encoding is NULL */ if( !head || !enc ) return no; - - for (Node *node = head->content; node; node = node->next) + tidyBufInit(&charsetString); + for (currentNode = head->content; currentNode; currentNode = currentNode->next) { - if (!nodeIsMETA(node)) + if (!nodeIsMETA(currentNode)) continue; - AttVal *charsetAttr = attrGetCHARSET(node); - AttVal *httpEquivAttr = attrGetHTTP_EQUIV(node); + charsetAttr = attrGetCHARSET(currentNode); + httpEquivAttr = attrGetHTTP_EQUIV(currentNode); if(!charsetAttr && !httpEquivAttr) continue; - // Meta charset comes in quite a few flavors: - // 1. - expected for (X)HTML5. + /* + Meta charset comes in quite a few flavors: + 1. - expected for (X)HTML5. + */ if (charsetAttr && !httpEquivAttr) { // we already found one, so remove the rest. if(charsetFound) { - Node *prevNode = node->prev; - TY_(ReportError)(doc, head, node, DISCARDING_UNEXPECTED); - TY_(DiscardElement)( doc, node ); - node = prevNode; + prevNode = currentNode->prev; + TY_(ReportError)(doc, head, currentNode, DISCARDING_UNEXPECTED); + TY_(DiscardElement)( doc, currentNode ); + currentNode = prevNode; continue; } charsetFound = yes; - tmbstr lCharset = TY_(tmbstrtolower)(charsetAttr->value); // Fix mismatched attribute value - if(strcmp(lCharset, enc) != 0) + if(TY_(tmbstrcmp)(TY_(tmbstrtolower)(charsetAttr->value), enc) != 0) { - tmbstr newValue = (tmbstr) TidyDocAlloc( doc, TY_(tmbstrlen)(enc) ); + newValue = (tmbstr) TidyDocAlloc( doc, TY_(tmbstrlen)(enc) ); TY_(tmbstrcpy)( newValue, enc ); - TY_(ReportAttrError)( doc, node, charsetAttr, BAD_ATTRIBUTE_VALUE_REPLACED ); + TY_(ReportAttrError)( doc, currentNode, charsetAttr, BAD_ATTRIBUTE_VALUE_REPLACED ); charsetAttr->value = newValue; } // Make sure it's the first element. - if ( node != head->content->next ){ - TY_(RemoveNode)( node ); - TY_(InsertNodeAtStart)( head, node ); + if ( currentNode != head->content->next ){ + TY_(RemoveNode)( currentNode ); + TY_(InsertNodeAtStart)( head, currentNode ); } continue; } - - // 2. - // expected for HTML4. This is normally ok - but can clash. + /* + 2. + expected for HTML4. This is normally ok - but can clash. + */ if(httpEquivAttr && !charsetAttr) { - AttVal *contentAttr = TY_(AttrGetById)(node, TidyAttr_CONTENT); - tmbstr lvalue = TY_(tmbstrtolower)(httpEquivAttr->value); - if(!contentAttr || strcmp(lvalue, "content-type") != 0) + tidyBufClear(&charsetString); + tidyBufAppend(&charsetString, "charset=", 8); + tidyBufAppend(&charsetString, (char*)enc, TY_(tmbstrlen)( enc )); + contentAttr = TY_(AttrGetById)(currentNode, TidyAttr_CONTENT); + httpEquivAttrValue = TY_(tmbstrtolower)(httpEquivAttr->value); + + if(!contentAttr || TY_(tmbstrcmp)(httpEquivAttr->value, (tmbstr) "content-type") != 0) continue; - tmbstr lcontent = TY_(tmbstrtolower)(contentAttr->value); - char* charsetString = "charset="; - char* expected = calloc(strlen(enc) + strlen(charsetString) + 1, sizeof(char*)); - strcat(expected, charsetString); - strcat(expected, enc); - if(TY_(tmbsubstr)(lcontent, expected)){ + lcontent = TY_(tmbstrtolower)(contentAttr->value); + if(TY_(tmbsubstr)(lcontent, (ctmbstr) &charsetString)){ printf("WARN ABOUT CLASH: %s \n", contentAttr->value); } - free(expected); } - // 3. - // This is generally bad. + /* + 3. + This is generally bad. + */ if(httpEquivAttr && charsetAttr) { printf("WARN ABOUT HTTP EQUIV AND CHARSET ATTR! \n"); @@ -1756,20 +1770,22 @@ Bool TY_(TidyMetaCharset)(TidyDocImpl* doc) if(charsetFound){ return yes; } - Node *node = TY_(InferredTag)(doc, TidyTag_META); + metaTag = TY_(InferredTag)(doc, TidyTag_META); switch(TY_(HTMLVersion)(doc)) { case HT50: case XH50: - TY_(AddAttribute)( doc, node, "charset", enc); + TY_(AddAttribute)( doc, metaTag, "charset", enc); break; default: - TY_(AddAttribute)( doc, node, "http-equiv", "content-type"); - TY_(AddAttribute)( doc, node, "content", "text/html; charset="); - AttVal *contentAttr = TY_(AttrGetById)(node, TidyAttr_CONTENT); - TY_(tmbstrcat)(contentAttr->value, enc); + tidyBufInit(&buf); + tidyBufAppend(&buf, "text/html; charset=", 19); + tidyBufAppend(&buf, (char*)enc, TY_(tmbstrlen)(enc)); + TY_(AddAttribute)( doc, metaTag, "content", (char*)buf.bp); + tidyBufFree(&buf); } - TY_(InsertNodeAtStart)( head, node ); + TY_(InsertNodeAtStart)( head, metaTag ); + tidyBufFree(&charsetString); return yes; }