tidy-html5/console/tidy.c

2141 lines
64 KiB
C
Raw Normal View History

2011-11-17 02:44:16 +00:00
/*
tidy.c - HTML TidyLib command line driver
Copyright (c) 1998-2008 World Wide Web Consortium
(Massachusetts Institute of Technology, European Research
Consortium for Informatics and Mathematics, Keio University).
All Rights Reserved.
*/
2011-11-17 02:44:16 +00:00
#include "tidy.h"
#include "locale.h"
#if defined(_WIN32)
#include <windows.h> /* Force console to UTF8. */
#endif
#if !defined(NDEBUG) && defined(_MSC_VER)
2014-08-03 18:33:29 +00:00
#include "sprtf.h"
#endif
#ifndef SPRTF
#define SPRTF printf
#endif
2011-11-17 02:44:16 +00:00
static FILE* errout = NULL; /* set to stderr */
/* static FILE* txtout = NULL; */ /* set to stdout */
#if defined(_WIN32)
static uint win_cp; /* original Windows code page */
#endif
/**
** Indicates whether or not two filenames are the same.
*/
2011-11-17 02:44:16 +00:00
static Bool samefile( ctmbstr filename1, ctmbstr filename2 )
{
#if FILENAMES_CASE_SENSITIVE
return ( strcmp( filename1, filename2 ) == 0 );
2011-11-17 02:44:16 +00:00
#else
return ( strcasecmp( filename1, filename2 ) == 0 );
2011-11-17 02:44:16 +00:00
#endif
}
/**
** Handles exit cleanup.
*/
static void tidy_cleanup()
{
#if defined(_WIN32)
/* Restore original Windows code page. */
SetConsoleOutputCP(win_cp);
#endif
}
/**
** Exits with an error in the event of an out of memory condition.
*/
2011-11-17 02:44:16 +00:00
static void outOfMemory(void)
{
fprintf(stderr, "%s", tidyLocalizedString(TC_STRING_OUT_OF_MEMORY));
exit(1);
2011-11-17 02:44:16 +00:00
}
/**
** Used by `print2Columns` and `print3Columns` to manage whitespace.
*/
2011-11-17 02:44:16 +00:00
static const char *cutToWhiteSpace(const char *s, uint offset, char *sbuf)
{
if (!s)
{
sbuf[0] = '\0';
return NULL;
}
else if (strlen(s) <= offset)
{
strcpy(sbuf,s);
sbuf[offset] = '\0';
return NULL;
}
else
{
uint j, l, n;
/* scan forward looking for newline */
j = 0;
while(j < offset && s[j] != '\n')
++j;
if ( j == offset ) {
/* scan backward looking for first space */
j = offset;
while(j && s[j] != ' ')
--j;
l = j;
n = j+1;
/* no white space */
if (j==0)
{
l = offset;
n = offset;
}
} else
{
l = j;
n = j+1;
}
strncpy(sbuf,s,l);
sbuf[l] = '\0';
return s+n;
}
}
/**
** Outputs one column of text.
*/
static void print1Column( const char* fmt, uint l1, const char *c1 )
{
const char *pc1=c1;
char *c1buf = (char *)malloc(l1+1);
if (!c1buf) outOfMemory();
do
{
pc1 = cutToWhiteSpace(pc1, l1, c1buf);
printf(fmt, c1buf[0] !='\0' ? c1buf : "");
} while (pc1);
free(c1buf);
2011-11-17 02:44:16 +00:00
}
/**
** Outputs two columns of text.
*/
2011-11-17 02:44:16 +00:00
static void print2Columns( const char* fmt, uint l1, uint l2,
const char *c1, const char *c2 )
2011-11-17 02:44:16 +00:00
{
const char *pc1=c1, *pc2=c2;
char *c1buf = (char *)malloc(l1+1);
char *c2buf = (char *)malloc(l2+1);
if (!c1buf) outOfMemory();
if (!c2buf) outOfMemory();
do
{
pc1 = cutToWhiteSpace(pc1, l1, c1buf);
pc2 = cutToWhiteSpace(pc2, l2, c2buf);
printf(fmt,
c1buf[0]!='\0'?c1buf:"",
c2buf[0]!='\0'?c2buf:"");
} while (pc1 || pc2);
free(c1buf);
free(c2buf);
2011-11-17 02:44:16 +00:00
}
/**
** Outputs three columns of text.
*/
2011-11-17 02:44:16 +00:00
static void print3Columns( const char* fmt, uint l1, uint l2, uint l3,
const char *c1, const char *c2, const char *c3 )
2011-11-17 02:44:16 +00:00
{
const char *pc1=c1, *pc2=c2, *pc3=c3;
char *c1buf = (char *)malloc(l1+1);
char *c2buf = (char *)malloc(l2+1);
char *c3buf = (char *)malloc(l3+1);
if (!c1buf) outOfMemory();
if (!c2buf) outOfMemory();
if (!c3buf) outOfMemory();
do
{
pc1 = cutToWhiteSpace(pc1, l1, c1buf);
pc2 = cutToWhiteSpace(pc2, l2, c2buf);
pc3 = cutToWhiteSpace(pc3, l3, c3buf);
printf(fmt,
c1buf[0]!='\0'?c1buf:"",
c2buf[0]!='\0'?c2buf:"",
c3buf[0]!='\0'?c3buf:"");
} while (pc1 || pc2 || pc3);
free(c1buf);
free(c2buf);
free(c3buf);
2011-11-17 02:44:16 +00:00
}
/**
** Format strings and decorations used in output.
*/
static const char helpfmt[] = " %-25.25s %-52.52s\n";
static const char helpul[] = "-----------------------------------------------------------------";
static const char fmt[] = "%-27.27s %-9.9s %-40.40s\n";
static const char valfmt[] = "%-27.27s %-9.9s %-39.39s\n";
static const char ul[] = "=================================================================";
/**
** This enum is used to categorize the options for help output.
*/
2011-11-17 02:44:16 +00:00
typedef enum
{
CmdOptFileManip,
CmdOptCatFIRST = CmdOptFileManip,
CmdOptProcDir,
CmdOptCharEnc,
CmdOptMisc,
CmdOptXML,
CmdOptCatLAST
2011-11-17 02:44:16 +00:00
} CmdOptCategory;
/**
** This array contains headings that will be used in help ouput.
*/
2011-11-17 02:44:16 +00:00
static const struct {
ctmbstr mnemonic; /**< Used in XML as a class. */
uint key; /**< Key to fetch the localized string. */
2011-11-17 02:44:16 +00:00
} cmdopt_catname[] = {
{ "file-manip", TC_STRING_FILE_MANIP },
{ "process-directives", TC_STRING_PROCESS_DIRECTIVES },
{ "char-encoding", TC_STRING_CHAR_ENCODING },
{ "misc", TC_STRING_MISC },
{ "xml", TC_STRING_XML }
2011-11-17 02:44:16 +00:00
};
/**
** The struct and subsequent array keep the help output structured
** because we _also_ output all of this stuff as as XML.
*/
2011-11-17 02:44:16 +00:00
typedef struct {
CmdOptCategory cat; /**< Category */
ctmbstr name1; /**< Name */
uint key; /**< Key to fetch the localized description. */
uint subKey; /**< Secondary substitution key. */
ctmbstr eqconfig; /**< Equivalent configuration option */
ctmbstr name2; /**< Name */
ctmbstr name3; /**< Name */
2011-11-17 02:44:16 +00:00
} CmdOptDesc;
/* All instances of %s will be substituted with localized string
specified by the subKey field. */
2011-11-17 02:44:16 +00:00
static const CmdOptDesc cmdopt_defs[] = {
{ CmdOptFileManip, "-output <%s>", TC_OPT_OUTPUT, TC_LABEL_FILE, "output-file: <%s>", "-o <%s>" },
{ CmdOptFileManip, "-config <%s>", TC_OPT_CONFIG, TC_LABEL_FILE, NULL },
{ CmdOptFileManip, "-file <%s>", TC_OPT_FILE, TC_LABEL_FILE, "error-file: <%s>", "-f <%s>" },
{ CmdOptFileManip, "-modify", TC_OPT_MODIFY, 0, "write-back: yes", "-m" },
{ CmdOptProcDir, "-indent", TC_OPT_INDENT, 0, "indent: auto", "-i" },
{ CmdOptProcDir, "-wrap <%s>", TC_OPT_WRAP, TC_LABEL_COL, "wrap: <%s>", "-w <%s>" },
{ CmdOptProcDir, "-upper", TC_OPT_UPPER, 0, "uppercase-tags: yes", "-u" },
{ CmdOptProcDir, "-clean", TC_OPT_CLEAN, 0, "clean: yes", "-c" },
{ CmdOptProcDir, "-bare", TC_OPT_BARE, 0, "bare: yes", "-b" },
{ CmdOptProcDir, "-gdoc", TC_OPT_GDOC, 0, "gdoc: yes", "-g" },
{ CmdOptProcDir, "-numeric", TC_OPT_NUMERIC, 0, "numeric-entities: yes", "-n" },
{ CmdOptProcDir, "-errors", TC_OPT_ERRORS, 0, "markup: no", "-e" },
{ CmdOptProcDir, "-quiet", TC_OPT_QUIET, 0, "quiet: yes", "-q" },
{ CmdOptProcDir, "-omit", TC_OPT_OMIT, 0, "omit-optional-tags: yes" },
{ CmdOptProcDir, "-xml", TC_OPT_XML, 0, "input-xml: yes" },
{ CmdOptProcDir, "-asxml", TC_OPT_ASXML, 0, "output-xhtml: yes", "-asxhtml" },
{ CmdOptProcDir, "-ashtml", TC_OPT_ASHTML, 0, "output-html: yes" },
2011-11-17 02:44:16 +00:00
#if SUPPORT_ACCESSIBILITY_CHECKS
{ CmdOptProcDir, "-access <%s>", TC_OPT_ACCESS, TC_LABEL_LEVL, "accessibility-check: <%s>" },
2011-11-17 02:44:16 +00:00
#endif
{ CmdOptCharEnc, "-raw", TC_OPT_RAW, 0, NULL },
{ CmdOptCharEnc, "-ascii", TC_OPT_ASCII, 0, NULL },
{ CmdOptCharEnc, "-latin0", TC_OPT_LATIN0, 0, NULL },
{ CmdOptCharEnc, "-latin1", TC_OPT_LATIN1, 0, NULL },
2011-11-17 02:44:16 +00:00
#ifndef NO_NATIVE_ISO2022_SUPPORT
{ CmdOptCharEnc, "-iso2022", TC_OPT_ISO2022, 0, NULL },
2011-11-17 02:44:16 +00:00
#endif
{ CmdOptCharEnc, "-utf8", TC_OPT_UTF8, 0, NULL },
{ CmdOptCharEnc, "-mac", TC_OPT_MAC, 0, NULL },
{ CmdOptCharEnc, "-win1252", TC_OPT_WIN1252, 0, NULL },
{ CmdOptCharEnc, "-ibm858", TC_OPT_IBM858, 0, NULL },
2011-11-17 02:44:16 +00:00
#if SUPPORT_UTF16_ENCODINGS
{ CmdOptCharEnc, "-utf16le", TC_OPT_UTF16LE, 0, NULL },
{ CmdOptCharEnc, "-utf16be", TC_OPT_UTF16BE, 0, NULL },
{ CmdOptCharEnc, "-utf16", TC_OPT_UTF16, 0, NULL },
2011-11-17 02:44:16 +00:00
#endif
#if SUPPORT_ASIAN_ENCODINGS /* #431953 - RJ */
{ CmdOptCharEnc, "-big5", TC_OPT_BIG5, 0, NULL },
{ CmdOptCharEnc, "-shiftjis", TC_OPT_SHIFTJIS, 0, NULL },
2011-11-17 02:44:16 +00:00
#endif
{ CmdOptMisc, "-version", TC_OPT_VERSION, 0, NULL, "-v" },
{ CmdOptMisc, "-help", TC_OPT_HELP, 0, NULL, "-h", "-?" },
{ CmdOptMisc, "-help-config", TC_OPT_HELPCFG, 0, NULL },
{ CmdOptMisc, "-show-config", TC_OPT_SHOWCFG, 0, NULL },
{ CmdOptMisc, "-help-option <%s>", TC_OPT_HELPOPT, TC_LABEL_OPT, NULL },
{ CmdOptMisc, "-language <%s>", TC_OPT_LANGUAGE, TC_LABEL_LANG, "language: <%s>" },
{ CmdOptXML, "-xml-help", TC_OPT_XMLHELP, 0, NULL },
{ CmdOptXML, "-xml-config", TC_OPT_XMLCFG, 0, NULL },
{ CmdOptXML, "-xml-strings", TC_OPT_XMLSTRG, 0, NULL },
{ CmdOptXML, "-xml-error-strings", TC_OPT_XMLERRS, 0, NULL },
{ CmdOptXML, "-xml-options-strings", TC_OPT_XMLOPTS, 0, NULL },
{ CmdOptMisc, NULL, 0, 0, NULL }
2011-11-17 02:44:16 +00:00
};
/**
** Create a new string with a format and arguments.
*/
static tmbstr stringWithFormat( const ctmbstr fmt, ... )
{
va_list argList;
tmbstr result = NULL;
int len = 0;
va_start(argList, fmt);
len = vsnprintf( result, 0, fmt, argList );
va_end(argList);
if (!(result = malloc( len + 1) ))
outOfMemory();
va_start(argList, fmt);
vsnprintf( result, len + 1, fmt, argList);
va_end(argList);
return result;
}
/**
** Option names aren't localized, but the sample fields
** are, for example <file> should be <archivo> in Spanish.
*/
static void localize_option_names( CmdOptDesc *pos)
{
ctmbstr fileString = tidyLocalizedString(pos->subKey);
pos->name1 = stringWithFormat(pos->name1, fileString);
if ( pos->name2 )
pos->name2 = stringWithFormat(pos->name2, fileString);
if ( pos->name3 )
pos->name3 = stringWithFormat(pos->name3, fileString);
if ( pos->eqconfig )
pos->eqconfig = stringWithFormat(pos->eqconfig, fileString);
}
/**
** Retrieve the options' names from the structure as a single
** string.
*/
2011-11-17 02:44:16 +00:00
static tmbstr get_option_names( const CmdOptDesc* pos )
{
tmbstr name;
uint len;
CmdOptDesc localPos = *pos;
localize_option_names( &localPos );
len = strlen(localPos.name1);
if (localPos.name2)
len += 2+strlen(localPos.name2);
if (localPos.name3)
len += 2+strlen(localPos.name3);
name = (tmbstr)malloc(len+1);
if (!name) outOfMemory();
strcpy(name, localPos.name1);
free((tmbstr)localPos.name1);
if (localPos.name2)
{
strcat(name, ", ");
strcat(name, localPos.name2);
free((tmbstr)localPos.name2);
}
if (localPos.name3)
{
strcat(name, ", ");
strcat(name, localPos.name3);
free((tmbstr)localPos.name3);
}
return name;
2011-11-17 02:44:16 +00:00
}
/**
** Escape a name for XML output.
*/
2011-11-17 02:44:16 +00:00
static tmbstr get_escaped_name( ctmbstr name )
{
tmbstr escpName;
char aux[2];
uint len = 0;
ctmbstr c;
for(c=name; *c!='\0'; ++c)
switch(*c)
{
case '<':
case '>':
len += 4;
break;
case '"':
len += 6;
break;
default:
len += 1;
break;
}
escpName = (tmbstr)malloc(len+1);
if (!escpName) outOfMemory();
escpName[0] = '\0';
aux[1] = '\0';
for(c=name; *c!='\0'; ++c)
switch(*c)
{
case '<':
strcat(escpName, "&lt;");
break;
case '>':
strcat(escpName, "&gt;");
break;
case '"':
strcat(escpName, "&quot;");
break;
default:
aux[0] = *c;
strcat(escpName, aux);
break;
}
return escpName;
2011-11-17 02:44:16 +00:00
}
/**
** Outputs a complete help option (text)
*/
2011-11-17 02:44:16 +00:00
static void print_help_option( void )
{
CmdOptCategory cat = CmdOptCatFIRST;
const CmdOptDesc* pos = cmdopt_defs;
for( cat=CmdOptCatFIRST; cat!=CmdOptCatLAST; ++cat)
{
ctmbstr name = tidyLocalizedString(cmdopt_catname[cat].key);
size_t len = strlen(name);
printf("%s\n", name );
printf("%*.*s\n", (int)len, (int)len, helpul );
for( pos=cmdopt_defs; pos->name1; ++pos)
{
tmbstr name;
if (pos->cat != cat)
continue;
name = get_option_names( pos );
print2Columns( helpfmt, 25, 52, name, tidyLocalizedString( pos->key ) );
free(name);
}
printf("\n");
}
2011-11-17 02:44:16 +00:00
}
/**
** Outputs an XML element for an option.
*/
2011-11-17 02:44:16 +00:00
static void print_xml_help_option_element( ctmbstr element, ctmbstr name )
{
tmbstr escpName;
if (!name)
return;
printf(" <%s>%s</%s>\n", element, escpName = get_escaped_name(name), element);
free(escpName);
2011-11-17 02:44:16 +00:00
}
/**
** Outputs a complete help option (XML)
*/
2011-11-17 02:44:16 +00:00
static void print_xml_help_option( void )
{
const CmdOptDesc* pos;
CmdOptDesc localPos;
for( pos=cmdopt_defs; pos->name1; ++pos)
{
localPos = *pos;
localize_option_names(&localPos);
printf(" <option class=\"%s\">\n", cmdopt_catname[pos->cat].mnemonic );
print_xml_help_option_element("name", localPos.name1);
print_xml_help_option_element("name", localPos.name2);
print_xml_help_option_element("name", localPos.name3);
print_xml_help_option_element("description", tidyLocalizedString( pos->key ) );
if (pos->eqconfig)
print_xml_help_option_element("eqconfig", localPos.eqconfig);
else
printf(" <eqconfig />\n");
printf(" </option>\n");
}
2011-11-17 02:44:16 +00:00
}
/**
** Provides the -xml-help service.
*/
2011-11-17 02:44:16 +00:00
static void xml_help( void )
{
printf( "<?xml version=\"1.0\"?>\n"
"<cmdline version=\"%s\">\n", tidyLibraryVersion());
print_xml_help_option();
printf( "</cmdline>\n" );
2011-11-17 02:44:16 +00:00
}
/**
** Returns the final name of the tidy executable.
*/
static ctmbstr get_final_name( ctmbstr prog )
{
ctmbstr name = prog;
int c;
size_t i, len = strlen(prog);
for (i = 0; i < len; i++) {
c = prog[i];
if ((( c == '/' ) || ( c == '\\' )) && prog[i+1])
name = &prog[i+1];
}
return name;
}
/**
** Handles the -help service.
*/
2011-11-17 02:44:16 +00:00
static void help( ctmbstr prog )
{
tmbstr title_line = NULL;
printf( tidyLocalizedString(TC_TXT_HELP_1), get_final_name(prog),tidyLibraryVersion() );
2011-11-17 02:44:16 +00:00
#ifdef PLATFORM_NAME
title_line = stringWithFormat( tidyLocalizedString(TC_TXT_HELP_2A), PLATFORM_NAME);
2011-11-17 02:44:16 +00:00
#else
title_line = stringWithFormat( tidyLocalizedString(TC_TXT_HELP_2B) );
2011-11-17 02:44:16 +00:00
#endif
printf( "%s\n", title_line );
printf("%*.*s\n", (int)strlen(title_line), (int)strlen(title_line), ul);
free( title_line );
printf( "\n");
print_help_option();
printf( "%s", tidyLocalizedString(TC_TXT_HELP_3) );
2011-11-17 02:44:16 +00:00
}
/**
** Utility to determine if an option is an AutoBool.
*/
2011-11-17 02:44:16 +00:00
static Bool isAutoBool( TidyOption topt )
{
TidyIterator pos;
ctmbstr def;
if ( tidyOptGetType( topt ) != TidyInteger)
return no;
pos = tidyOptGetPickList( topt );
while ( pos )
{
def = tidyOptGetNextPick( topt, &pos );
if (0==strcmp(def,"yes"))
return yes;
}
return no;
2011-11-17 02:44:16 +00:00
}
/**
** Returns the configuration category name for the
** specified configuration category id. This will be
** used as an XML class attribute value.
*/
static ctmbstr ConfigCategoryName( TidyConfigCategory id )
2011-11-17 02:44:16 +00:00
{
if (id >= TidyMarkup && id <= TidyInternalCategory)
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
return tidyLocalizedString(id);
fprintf(stderr, tidyLocalizedString(TC_STRING_FATAL_ERROR), (int)id);
fprintf(stderr, "\n");
assert(0);
abort();
return "never_here"; /* only for the compiler warning */
2011-11-17 02:44:16 +00:00
}
/**
** Structure maintains a description of an option.
*/
2011-11-17 02:44:16 +00:00
typedef struct {
ctmbstr name; /**< Name */
ctmbstr cat; /**< Category */
uint catid; /**< Category ID */
ctmbstr type; /**< "String, ... */
ctmbstr vals; /**< Potential values. If NULL, use an external function */
ctmbstr def; /**< default */
tmbchar tempdefs[80]; /**< storage for default such as integer */
Bool haveVals; /**< if yes, vals is valid */
2011-11-17 02:44:16 +00:00
} OptionDesc;
typedef void (*OptionFunc)( TidyDoc, TidyOption, OptionDesc * );
/**
** Create OptionDesc "d" related to "opt"
*/
2011-11-17 02:44:16 +00:00
static
void GetOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d )
{
TidyOptionId optId = tidyOptGetId( topt );
TidyOptionType optTyp = tidyOptGetType( topt );
d->name = tidyOptGetName( topt );
d->cat = ConfigCategoryName( tidyOptGetCategory( topt ) );
d->catid = tidyOptGetCategory( topt );
d->vals = NULL;
d->def = NULL;
d->haveVals = yes;
/* Handle special cases first.
*/
switch ( optId )
{
case TidyDuplicateAttrs:
case TidySortAttributes:
case TidyNewline:
case TidyAccessibilityCheckLevel:
d->type = "enum";
d->vals = NULL;
d->def =
optId==TidyNewline ?
"<em>Platform dependent</em>"
:tidyOptGetCurrPick( tdoc, optId );
break;
case TidyDoctype:
d->type = "DocType";
d->vals = NULL;
{
ctmbstr sdef = NULL;
sdef = tidyOptGetCurrPick( tdoc, TidyDoctypeMode );
if ( !sdef || *sdef == '*' )
sdef = tidyOptGetValue( tdoc, TidyDoctype );
d->def = sdef;
}
break;
case TidyInlineTags:
case TidyBlockTags:
case TidyEmptyTags:
case TidyPreTags:
d->type = "Tag names";
d->vals = "tagX, tagY, ...";
d->def = NULL;
break;
case TidyCharEncoding:
case TidyInCharEncoding:
case TidyOutCharEncoding:
d->type = "Encoding";
d->def = tidyOptGetEncName( tdoc, optId );
if (!d->def)
d->def = "?";
d->vals = NULL;
break;
/* General case will handle remaining */
default:
switch ( optTyp )
{
case TidyBoolean:
d->type = "Boolean";
d->vals = "y/n, yes/no, t/f, true/false, 1/0";
d->def = tidyOptGetCurrPick( tdoc, optId );
break;
case TidyInteger:
if (isAutoBool(topt))
{
d->type = "AutoBool";
d->vals = "auto, y/n, yes/no, t/f, true/false, 1/0";
d->def = tidyOptGetCurrPick( tdoc, optId );
}
else
{
uint idef;
d->type = "Integer";
if ( optId == TidyWrapLen )
d->vals = "0 (no wrapping), 1, 2, ...";
else
d->vals = "0, 1, 2, ...";
idef = tidyOptGetInt( tdoc, optId );
sprintf(d->tempdefs, "%u", idef);
d->def = d->tempdefs;
}
break;
case TidyString:
d->type = "String";
d->vals = NULL;
d->haveVals = no;
d->def = tidyOptGetValue( tdoc, optId );
break;
}
}
2011-11-17 02:44:16 +00:00
}
/**
** Array holding all options. Contains a trailing sentinel.
*/
2011-11-17 02:44:16 +00:00
typedef struct {
TidyOption topt[N_TIDY_OPTIONS];
2011-11-17 02:44:16 +00:00
} AllOption_t;
/**
** A simple option comparator.
**/
static int cmpOpt(const void* e1_, const void *e2_)
2011-11-17 02:44:16 +00:00
{
const TidyOption* e1 = (const TidyOption*)e1_;
const TidyOption* e2 = (const TidyOption*)e2_;
return strcmp(tidyOptGetName(*e1), tidyOptGetName(*e2));
2011-11-17 02:44:16 +00:00
}
/**
** Returns options sorted.
**/
static void getSortedOption( TidyDoc tdoc, AllOption_t *tOption )
2011-11-17 02:44:16 +00:00
{
TidyIterator pos = tidyGetOptionList( tdoc );
uint i = 0;
while ( pos )
{
TidyOption topt = tidyGetNextOption( tdoc, &pos );
tOption->topt[i] = topt;
++i;
}
tOption->topt[i] = NULL; /* sentinel */
qsort(tOption->topt,
2017-03-09 21:04:03 +00:00
i, /* there are i items, not including the sentinal */
sizeof(tOption->topt[0]),
cmpOpt);
2011-11-17 02:44:16 +00:00
}
/**
** An iterator for the sorted options.
**/
2011-11-17 02:44:16 +00:00
static void ForEachSortedOption( TidyDoc tdoc, OptionFunc OptionPrint )
{
AllOption_t tOption;
const TidyOption *topt;
getSortedOption( tdoc, &tOption );
for( topt = tOption.topt; *topt; ++topt)
{
OptionDesc d;
GetOption( tdoc, *topt, &d );
(*OptionPrint)( tdoc, *topt, &d );
}
2011-11-17 02:44:16 +00:00
}
/**
** An iterator for the unsorted options.
**/
2011-11-17 02:44:16 +00:00
static void ForEachOption( TidyDoc tdoc, OptionFunc OptionPrint )
{
TidyIterator pos = tidyGetOptionList( tdoc );
while ( pos )
{
TidyOption topt = tidyGetNextOption( tdoc, &pos );
OptionDesc d;
GetOption( tdoc, topt, &d );
(*OptionPrint)( tdoc, topt, &d );
}
2011-11-17 02:44:16 +00:00
}
/**
** Prints an option's allowed value as specified in its pick list.
**/
static void PrintAllowedValuesFromPick( TidyOption topt )
2011-11-17 02:44:16 +00:00
{
TidyIterator pos = tidyOptGetPickList( topt );
Bool first = yes;
ctmbstr def;
while ( pos )
{
if (first)
first = no;
else
printf(", ");
def = tidyOptGetNextPick( topt, &pos );
printf("%s", def);
}
2011-11-17 02:44:16 +00:00
}
/**
** Prints an option's allowed values.
**/
static void PrintAllowedValues( TidyOption topt, const OptionDesc *d )
2011-11-17 02:44:16 +00:00
{
if (d->vals)
printf( "%s", d->vals );
else
PrintAllowedValuesFromPick( topt );
2011-11-17 02:44:16 +00:00
}
/**
** Prints for XML an option's <description>.
**/
static void printXMLDescription( TidyDoc tdoc, TidyOption topt )
2011-11-17 02:44:16 +00:00
{
ctmbstr doc = tidyOptGetDoc( tdoc, topt );
if (doc)
printf(" <description>%s</description>\n", doc);
else
{
printf(" <description />\n");
fprintf(stderr, tidyLocalizedString(TC_STRING_OPT_NOT_DOCUMENTED),
tidyOptGetName( topt ));
fprintf(stderr, "\n");
}
2011-11-17 02:44:16 +00:00
}
/**
** Prints for XML an option's <seealso>.
**/
static void printXMLCrossRef( TidyDoc tdoc, TidyOption topt )
2011-11-17 02:44:16 +00:00
{
TidyOption optLinked;
TidyIterator pos = tidyOptGetDocLinksList(tdoc, topt);
while( pos )
{
optLinked = tidyOptGetNextDocLinks(tdoc, &pos );
printf(" <seealso>%s</seealso>\n",tidyOptGetName(optLinked));
}
2011-11-17 02:44:16 +00:00
}
2017-03-08 21:53:59 +00:00
/**
** Prints for XML an option's <eqconfig>.
**/
static void printXMLCrossRefEqConsole( TidyDoc tdoc, TidyOption topt )
{
const CmdOptDesc* pos = cmdopt_defs;
const CmdOptDesc* hit = NULL;
CmdOptDesc localHit;
enum { sizeBuffer = 50 }; /* largest config name is 27 chars so far... */
char buffer[sizeBuffer];
for( pos=cmdopt_defs; pos->name1; ++pos)
{
snprintf(buffer, sizeBuffer, "%s:", tidyOptGetName( topt ));
if ( pos->eqconfig && (strncmp(buffer, pos->eqconfig, strlen(buffer)) == 0) )
{
hit = pos;
break;
}
}
if ( hit )
{
localHit = *hit;
localize_option_names( &localHit );
2017-03-09 21:04:03 +00:00
printf(" <eqconsole>%s</eqconsole>\n", get_escaped_name(localHit.name1));
2017-03-08 21:53:59 +00:00
if ( localHit.name2 )
2017-03-09 21:04:03 +00:00
printf(" <eqconsole>%s</eqconsole>\n", get_escaped_name(localHit.name2));
2017-03-08 21:53:59 +00:00
if ( localHit.name3 )
2017-03-09 21:04:03 +00:00
printf(" <eqconsole>%s</eqconsole>\n", get_escaped_name(localHit.name3));
2017-03-08 21:53:59 +00:00
}
else
printf(" %s\n", " <eqconsole />");
}
/**
** Prints for XML an option.
**/
static void printXMLOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d )
2011-11-17 02:44:16 +00:00
{
if ( tidyOptGetCategory(topt) == TidyInternalCategory )
return;
printf( " <option class=\"%s\">\n", d->cat );
printf (" <name>%s</name>\n",d->name);
printf (" <type>%s</type>\n",d->type);
if (d->def)
printf(" <default>%s</default>\n",d->def);
else
printf(" <default />\n");
if (d->haveVals)
{
printf(" <example>");
PrintAllowedValues( topt, d );
printf("</example>\n");
}
else
{
printf(" <example />\n");
}
printXMLDescription( tdoc, topt );
printXMLCrossRef( tdoc, topt );
2017-03-08 21:53:59 +00:00
printXMLCrossRefEqConsole( tdoc, topt );
printf( " </option>\n" );
2011-11-17 02:44:16 +00:00
}
/**
** Handles the -xml-config service.
**/
2011-11-17 02:44:16 +00:00
static void XMLoptionhelp( TidyDoc tdoc )
{
printf( "<?xml version=\"1.0\"?>\n"
"<config version=\"%s\">\n", tidyLibraryVersion());
ForEachOption( tdoc, printXMLOption );
printf( "</config>\n" );
2011-11-17 02:44:16 +00:00
}
/**
* Prints the Windows language names that Tidy recognizes,
* using the specified format string.
*/
void tidyPrintWindowsLanguageNames( ctmbstr format )
2011-11-17 02:44:16 +00:00
{
const tidyLocaleMapItem *item;
TidyIterator i = getWindowsLanguageList();
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
ctmbstr winName;
ctmbstr posixName;
while (i) {
item = getNextWindowsLanguage(&i);
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
winName = TidyLangWindowsName( item );
posixName = TidyLangPosixName( item );
if ( format )
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
printf( format, winName, posixName );
2011-11-17 02:44:16 +00:00
else
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
printf( "%-20s -> %s\n", winName, posixName );
2011-11-17 02:44:16 +00:00
}
}
/**
* Prints the languages the are currently built into Tidy,
* using the specified format string.
*/
void tidyPrintTidyLanguageNames( ctmbstr format )
{
ctmbstr item;
TidyIterator i = getInstalledLanguageList();
while (i) {
item = getNextInstalledLanguage(&i);
if ( format )
printf( format, item );
2011-11-17 02:44:16 +00:00
else
printf( "%s\n", item );
2011-11-17 02:44:16 +00:00
}
}
/**
** Retrieves allowed values from an option's pick list.
*/
static tmbstr GetAllowedValuesFromPick( TidyOption topt )
2011-11-17 02:44:16 +00:00
{
TidyIterator pos;
Bool first;
ctmbstr def;
uint len = 0;
tmbstr val;
pos = tidyOptGetPickList( topt );
first = yes;
while ( pos )
{
if (first)
first = no;
else
len += 2;
def = tidyOptGetNextPick( topt, &pos );
len += strlen(def);
}
val = (tmbstr)malloc(len+1);
if (!val) outOfMemory();
val[0] = '\0';
pos = tidyOptGetPickList( topt );
first = yes;
while ( pos )
{
if (first)
first = no;
else
strcat(val, ", ");
def = tidyOptGetNextPick( topt, &pos );
strcat(val, def);
}
return val;
2011-11-17 02:44:16 +00:00
}
/**
** Retrieves allowed values for an option.
*/
static tmbstr GetAllowedValues( TidyOption topt, const OptionDesc *d )
2011-11-17 02:44:16 +00:00
{
if (d->vals)
{
tmbstr val = (tmbstr)malloc(1+strlen(d->vals));
if (!val) outOfMemory();
strcpy(val, d->vals);
return val;
}
else
return GetAllowedValuesFromPick( topt );
}
/**
** Prints a single option.
*/
static void printOption( TidyDoc ARG_UNUSED(tdoc), TidyOption topt,
OptionDesc *d )
{
if (tidyOptGetCategory( topt ) == TidyInternalCategory )
return;
if ( *d->name || *d->type )
{
ctmbstr pval = d->vals;
tmbstr val = NULL;
if (!d->haveVals)
{
pval = "-";
}
else if (pval == NULL)
{
val = GetAllowedValues( topt, d);
pval = val;
}
print3Columns( fmt, 27, 9, 40, d->name, d->type, pval );
if (val)
free(val);
}
2011-11-17 02:44:16 +00:00
}
/**
** Handles the -help-config service.
*/
2011-11-17 02:44:16 +00:00
static void optionhelp( TidyDoc tdoc )
{
printf( "%s", tidyLocalizedString( TC_TXT_HELP_CONFIG ) );
printf( fmt,
tidyLocalizedString( TC_TXT_HELP_CONFIG_NAME ),
tidyLocalizedString( TC_TXT_HELP_CONFIG_TYPE ),
tidyLocalizedString( TC_TXT_HELP_CONFIG_ALLW ) );
printf( fmt, ul, ul, ul );
ForEachSortedOption( tdoc, printOption );
}
2011-11-17 02:44:16 +00:00
/**
** Cleans up the HTML-laden option descriptions for console
** output. It's just a simple HTML filtering/replacement function.
** Will return an allocated string.
*/
static tmbstr cleanup_description( ctmbstr description )
{
/* Substitutions - this might be a good spot to introduce platform
dependent definitions for colorized output on different terminals
that support, for example, ANSI escape sequences. The assumption
is made the Mac and Linux targets support ANSI colors, but even
so debugger terminals may not. Note that the line-wrapping
function also doesn't account for non-printing characters. */
static struct {
ctmbstr tag;
ctmbstr replacement;
} const replacements[] = {
{ "lt", "<" },
{ "gt", ">" },
{ "br/", "\n\n" },
#if defined(LINUX_OS) || defined(MAC_OS_X)
{ "code", "\x1b[36m" },
{ "/code", "\x1b[0m" },
{ "em", "\x1b[4m" },
{ "/em", "\x1b[0m" },
{ "strong", "\x1b[31m" },
{ "/strong", "\x1b[0m" },
#endif
/* MUST be last */
{ NULL, NULL },
};
/* State Machine Setup */
typedef enum {
s_DONE,
s_DATA,
s_WRITING,
s_TAG_OPEN,
s_TAG_NAME,
s_ERROR,
s_LAST /* MUST be last */
} states;
typedef enum {
c_NIL,
c_EOF,
c_BRACKET_CLOSE,
c_BRACKET_OPEN,
c_OTHER
} charstates;
typedef enum {
a_NIL,
a_BUILD_NAME,
a_CONSUME,
a_EMIT,
a_EMIT_SUBS,
a_WRITE,
a_ERROR
} actions;
typedef struct {
states state;
charstates charstate;
actions action;
states next_state;
} transitionType;
const transitionType transitions[] = {
{ s_DATA, c_EOF, a_NIL, s_DONE },
{ s_DATA, c_BRACKET_OPEN, a_CONSUME, s_TAG_OPEN },
/* special case allows ; */
{ s_DATA, c_BRACKET_CLOSE, a_EMIT, s_WRITING },
{ s_DATA, c_OTHER, a_EMIT, s_WRITING },
{ s_WRITING, c_OTHER, a_WRITE, s_DATA },
{ s_WRITING, c_BRACKET_CLOSE, a_WRITE, s_DATA },
{ s_TAG_OPEN, c_EOF, a_ERROR, s_DONE },
{ s_TAG_OPEN, c_OTHER, a_NIL, s_TAG_NAME },
{ s_TAG_NAME, c_BRACKET_OPEN, a_ERROR, s_DONE },
{ s_TAG_NAME, c_EOF, a_ERROR, s_DONE },
{ s_TAG_NAME, c_BRACKET_CLOSE, a_EMIT_SUBS, s_WRITING },
{ s_TAG_NAME, c_OTHER, a_BUILD_NAME, s_TAG_NAME },
{ s_ERROR, 0, a_ERROR, s_DONE },
{ s_DONE, 0, a_NIL, 0 },
/* MUST be last: */
{ s_LAST, 0, 0, 0 },
};
/* Output Setup */
tmbstr result = NULL;
2016-10-25 14:41:03 +00:00
int g_result = 100; /* minimum buffer grow size */
int l_result = 0; /* buffer current size */
int i_result = 0; /* current string position */
int writer_len = 0; /* writer length */
ctmbstr writer = NULL;
/* Current tag name setup */
2016-10-25 14:41:03 +00:00
tmbstr name = NULL; /* tag name */
int g_name = 10; /* buffer grow size */
int l_name = 0; /* buffer current size */
int i_name = 0; /* current string position */
/* Pump Setup */
int i = 0;
states state = s_DATA;
charstates charstate;
char c;
int j = 0, k = 0;
transitionType transition;
if ( !description || (strlen(description) < 1) )
{
return NULL;
}
/* Process the HTML Snippet */
do {
c = description[i];
/* Determine secondary state. */
switch (c)
{
case '\0':
charstate = c_EOF;
break;
case '<':
case '&':
charstate = c_BRACKET_OPEN;
break;
case '>':
case ';':
charstate = c_BRACKET_CLOSE;
break;
default:
charstate = c_OTHER;
break;
}
/* Find the correct instruction */
j = 0;
while (transitions[j].state != s_LAST)
{
transition = transitions[j];
if ( transition.state == state && transition.charstate == charstate ) {
switch ( transition.action )
{
/* This action is building the name of an HTML tag. */
case a_BUILD_NAME:
if ( !name )
{
l_name = g_name;
name = calloc(l_name, 1);
}
if ( i_name >= l_name )
{
l_name = l_name + g_name;
name = realloc(name, l_name);
}
strncpy(name + i_name, &c, 1);
i_name++;
i++;
break;
/* This character will be emitted into the output
stream. The only purpose of this action is to
ensure that `writer` is NULL as a flag that we
will output the current `c` */
case a_EMIT:
2016-10-25 14:41:03 +00:00
writer = NULL; /* flag to use c */
break;
/* Now that we've consumed a tag, we will emit the
substitution if any has been specified in
`replacements`. */
case a_EMIT_SUBS:
name[i_name] = '\0';
i_name = 0;
k = 0;
writer = "";
while ( replacements[k].tag )
{
if ( strcmp( replacements[k].tag, name ) == 0 )
{
writer = replacements[k].replacement;
}
k++;
}
break;
/* This action will add to our `result` string, expanding
the buffer as necessary in reasonable chunks. */
case a_WRITE:
if ( !writer )
writer_len = 1;
else
writer_len = strlen( writer );
/* Lazy buffer creation */
if ( !result )
{
l_result = writer_len + g_result;
result = calloc(l_result, 1);
}
/* Grow the buffer if needed */
if ( i_result + writer_len >= l_result )
{
l_result = l_result + writer_len + g_result;
result = realloc(result, l_result);
}
/* Add current writer to the buffer */
if ( !writer )
{
result[i_result] = c;
result[i_result +1] = '\0';
}
else
{
strncpy( result + i_result, writer, writer_len );
}
i_result += writer_len;
i++;
break;
/* This action could be more robust but it serves the
current purpose. Cross our fingers and count on our
localizers not to give bad HTML descriptions. */
case a_ERROR:
printf("<Error> The localized string probably has bad HTML.\n");
goto EXIT_CLEANLY;
/* Just a NOP. */
case a_NIL:
break;
/* The default case also handles the CONSUME action. */
default:
i++;
break;
}
state = transition.next_state;
break;
}
j++;
}
} while ( description[i] );
EXIT_CLEANLY:
if ( name )
free(name);
return result;
2011-11-17 02:44:16 +00:00
}
/**
** Handles the -help-option service.
*/
static void optionDescribe( TidyDoc tdoc, char *tag )
{
tmbstr result = NULL;
2017-03-09 21:04:03 +00:00
Bool allocated = no;
TidyOptionId topt = tidyOptGetIdForName( tag );
uint tcat = tidyOptGetCategory( tidyGetOption(tdoc, topt));
2017-03-09 21:04:03 +00:00
if (topt < N_TIDY_OPTIONS && tcat != TidyInternalCategory )
{
result = cleanup_description( tidyOptGetDoc( tdoc, tidyGetOption( tdoc, topt ) ) );
2017-03-09 21:04:03 +00:00
allocated = yes;
}
else
{
result = (tmbstr)tidyLocalizedString(TC_STRING_UNKNOWN_OPTION_B);
}
printf( "\n" );
printf( "`--%s`\n\n", tag );
print1Column( "%-68.68s\n", 68, result );
printf( "\n" );
2017-03-09 21:04:03 +00:00
if ( allocated )
free ( result );
}
/**
* Prints the option value for a given option.
*/
static void printOptionValues( TidyDoc ARG_UNUSED(tdoc), TidyOption topt,
OptionDesc *d )
{
TidyOptionId optId = tidyOptGetId( topt );
if ( tidyOptGetCategory(topt) == TidyInternalCategory )
return;
switch ( optId )
{
case TidyInlineTags:
case TidyBlockTags:
case TidyEmptyTags:
case TidyPreTags:
{
TidyIterator pos = tidyOptGetDeclTagList( tdoc );
while ( pos )
{
d->def = tidyOptGetNextDeclTag(tdoc, optId, &pos);
if ( pos )
{
if ( *d->name )
printf( valfmt, d->name, d->type, d->def );
else
printf( fmt, d->name, d->type, d->def );
d->name = "";
d->type = "";
}
}
}
break;
case TidyNewline:
d->def = tidyOptGetCurrPick( tdoc, optId );
break;
default:
break;
}
/* fix for http://tidy.sf.net/bug/873921 */
if ( *d->name || *d->type || (d->def && *d->def) )
{
if ( ! d->def )
d->def = "";
if ( *d->name )
printf( valfmt, d->name, d->type, d->def );
else
printf( fmt, d->name, d->type, d->def );
}
}
/**
** Handles the -show-config service.
*/
static void optionvalues( TidyDoc tdoc )
{
printf( "\n%s\n\n", tidyLocalizedString(TC_STRING_CONF_HEADER) );
printf( fmt, tidyLocalizedString(TC_STRING_CONF_NAME),
tidyLocalizedString(TC_STRING_CONF_TYPE),
tidyLocalizedString(TC_STRING_CONF_VALUE) );
printf( fmt, ul, ul, ul );
ForEachSortedOption( tdoc, printOptionValues );
}
/**
** Handles the -version service.
*/
static void version( void )
2011-11-17 02:44:16 +00:00
{
#ifdef PLATFORM_NAME
printf( tidyLocalizedString( TC_STRING_VERS_A ), PLATFORM_NAME, tidyLibraryVersion() );
#else
printf( tidyLocalizedString( TC_STRING_VERS_B ), tidyLibraryVersion() );
#endif
printf("\n");
}
2011-11-17 02:44:16 +00:00
/**
** Handles the printing of option description for
** -xml-options-strings service.
**/
static void printXMLOptionString( TidyDoc tdoc, TidyOption topt, OptionDesc *d )
{
if ( tidyOptIsReadOnly(topt) )
return;
printf( " <option>\n" );
printf( " <name>%s</name>\n",d->name);
printf( " <string class=\"%s\"><![CDATA[%s]]></string>\n", tidyGetLanguage(), tidyOptGetDoc( tdoc, topt ) );
printf( " </option>\n" );
2011-11-17 02:44:16 +00:00
}
/**
** Handles the -xml-options-strings service.
** This service is primarily helpful to developers and localizers to test
** that option description strings as represented on screen output are
** correct and do not break tidy.
**/
static void xml_options_strings( TidyDoc tdoc )
2011-11-17 02:44:16 +00:00
{
printf( "<?xml version=\"1.0\"?>\n"
"<options_strings version=\"%s\">\n", tidyLibraryVersion());
ForEachOption( tdoc, printXMLOptionString);
printf( "</options_strings>\n" );
}
2011-11-17 02:44:16 +00:00
/**
** Handles the -xml-error-strings service.
** This service is primarily helpful to developers who need to generate
** an updated list of strings to expect when using `TidyReportFilter3`.
** Included in the output is the current string associated with the error
** symbol.
**/
static void xml_error_strings( TidyDoc tdoc )
{
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
uint errorCode;
ctmbstr localizedString;
TidyIterator j = getErrorCodeList();
printf( "<?xml version=\"1.0\"?>\n" );
printf( "<error_strings version=\"%s\">\n", tidyLibraryVersion());
while (j) {
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
errorCode = getNextErrorCode(&j);
localizedString = tidyLocalizedString(errorCode);
printf( " <error_string>\n" );
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
printf( " <name>%s</name>\n", tidyErrorCodeAsKey(errorCode));
if ( localizedString )
2016-02-17 06:17:18 +00:00
printf( " <string class=\"%s\"><![CDATA[%s]]></string>\n", tidyGetLanguage(), localizedString );
else
printf( " <string class=\"%s\">NULL</string>\n", tidyGetLanguage() );
printf( " </error_string>\n" );
}
printf( "</error_strings>\n" );
}
/**
** Handles the -xml-strings service.
** This service was primarily helpful to developers and localizers to
** compare localized strings to the built in `en` strings. It's probably
** better to use our POT/PO workflow with your favorite tools, or simply
** diff the language header files directly.
** **Important:** The attribute `id` is not a specification, promise, or
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
** part of an API. You must not depend on this value. For strings meant
** for error output, the `label` attribute will contain the stringified
** version of the internal key for the string.
*/
static void xml_strings( void )
{
uint i;
TidyIterator j;
ctmbstr current_language = tidyGetLanguage();
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
ctmbstr current_label;
Bool skip_current = strcmp( current_language, "en" ) == 0;
Bool matches_base;
printf( "<?xml version=\"1.0\"?>\n"
"<localized_strings version=\"%s\">\n", tidyLibraryVersion());
j = getStringKeyList();
while (j) {
i = getNextStringKey(&j);
Several foundational changes preparing for release of 5.4 and future 5.5: - Consolidated all output string definitions enums into `tidyenum.h`, which is where they belong, and where they have proper visibility. - Re-arranged `messages.c/h` with several comments useful to developers. - Properly added the key lookup functions and the language localization functions into tidy.h/tidylib.c with proper name-spacing. - Previous point restored a *lot* of sanity to the #include pollution that's been introduced in light of these. - Note that opaque types have been (properly) introduced. Look at the updated headers for `language.h`. In particular only an opaque structure is passed outside of LibTidy, and so use TidyLangWindowsName and TidyLangPosixName to poll these objects. - Console application updated as a result of this. - Removed dead code: - void TY_(UnknownOption)( TidyDocImpl* doc, char c ); - void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); - Redundant strings were removed with the removal of this dead code. - Several enums were given fixed starting values. YOUR PROGRAMS SHOULD NEVER depend on enum values. `TidyReportLevel` is an example of such. - Some enums were removed as a result of this. `TidyReportLevel` now has matching strings, so the redundant `TidyReportLevelStrings` was removed. - All of the PO's and language header files were regenerated as a result of the string cleanup and header cleanup. - Made the interface to the library version and release date consistent. - CMakeLists.txt now supports SUPPORT_CONSOLE_APP. The intention is to be able to remove console-only code from LibTidy (for LibTidy users). - Updated README/MESSAGES.md, which is *vastly* more simple now.
2017-02-13 19:29:47 +00:00
current_label = tidyErrorCodeAsKey(i);
if (!strcmp(current_label, "UNDEFINED"))
current_label = "";
printf( "<localized_string id=\"%u\" label=\"%s\">\n", i, current_label );
printf( " <string class=\"%s\">", "en" );
printf("%s", tidyDefaultString(i));
printf( "</string>\n" );
if ( !skip_current ) {
matches_base = strcmp( tidyLocalizedString(i), tidyDefaultString(i) ) == 0;
printf( " <string class=\"%s\" same_as_base=\"%s\">", tidyGetLanguage(), matches_base ? "yes" : "no" );
printf("%s", tidyLocalizedString(i));
printf( "</string>\n" );
}
printf( "</localized_string>\n");
}
printf( "</localized_strings>\n" );
2011-11-17 02:44:16 +00:00
}
/**
** Handles the -lang help service.
*/
static void lang_help( void )
2011-11-17 02:44:16 +00:00
{
printf( "%s", tidyLocalizedString(TC_TXT_HELP_LANG_1) );
tidyPrintWindowsLanguageNames(" %-20s -> %s\n");
printf( "%s", tidyLocalizedString(TC_TXT_HELP_LANG_2) );
tidyPrintTidyLanguageNames(" %s\n");
printf( tidyLocalizedString(TC_TXT_HELP_LANG_3), tidyGetLanguage() );
2011-11-17 02:44:16 +00:00
}
/**
** Provides the `unknown option` output.
*/
2011-11-17 02:44:16 +00:00
static void unknownOption( uint c )
{
fprintf( errout, tidyLocalizedString( TC_STRING_UNKNOWN_OPTION ), (char)c );
fprintf( errout, "\n");
2011-11-17 02:44:16 +00:00
}
Massive Revamp of the Messaging System This is a rather large refactoring of Tidy's messaging system. This was done mostly to allow non-C libraries that cannot adequately take advantage of arg_lists a chance to query report filter information for information related to arguments used in constructing an error message. Three main goals were in mind for this project: - Don't change the contents of Tidy's existing output sinks. This will ensure that changes do no affect console Tidy users, or LibTidy users who use the output sinks directly. This was accomplished 100% other than some improved cosmetics in the output. See tidy-html5-tests repository, the `refactor` and `more_messages_changes` branches for these minor diffs. - Provide an API that is simple and also extensible without having to write new error filters all the time. This was accomplished by adding the new message callback `TidyMessageCallback` that provides callback functions an opaque object representing the message, and an API to query the message for wanted details. With this, we should never have to add a new callback routine again, as additional API can simply be written against the opaque object. - The API should work the same as the rest of LibTidy's API in that it's consistent and only uses simple types with wide interoperability with other languages. Thanks to @gagern who suggested the model for the API in #409. Although the API uses the "Tidy" way off accessing data via an iterator rather than an index, this can be easily abstracted in the target language. There are two *major* API breaking changes: - Removed TidyReportFilter2 - This was only used by one application in the entire world, and was a hacky kludge that served its purpose. TidyReportCallback (né TidyReportFilter3) is much better. If, for some reason, this affects you, I recommend using TidyReportCallback instead. It's a minor change for your application. - Renamed TidyReportFilter3 to TidyReportCallback - This name is much more semantic, and much more sensible in light of improved callback system. As the name implies, it remains capable of *only* receiving callbacks for Tidy "reports." Introducing TidyMessageCallback, and a new message interrogation API. - As its name implies, it is able to capture (and optionally suppress) *all* of Tidy's output, including the dialogue messages that never make it to the existing report filters. - Provides an opaque `TidyMessage` and an API that can be used to query against it to find the juicy goodness inside. - For example, `tidyGetMessageOutput( tmessage )` will return the complete, localized message. - Another example, `tidyGetMessageLine( tmessage )` will return the line the message applies to. - You can also get information about the individual arguments that make up a message. By using the `tidyGetMessageArguments( tmessage )` itorator and `tidyGetNextMessageArgument` you will obtain an opaque `TidyMessageArgument` which has its own interrogation API. For example: - tidyGetArgType( tmessage, &iterator ); - tidyGetArgFormat( tmessage, &iterator ); - tidyGetArgValueString( tmessage, &iterator ); - …and so on. Other major changes include refactoring `messages.c` to use the new message "object" directly when emitting messages to the console or output sinks. This allowed replacement of a lot of specialized functions with generalized ones. Some of this generalizing involved modifications to the `language_xx.h` header files, and these are all positive improvements even without the above changes.
2017-03-13 17:28:57 +00:00
/**
** This callback from LibTidy allows the console application to examine an
** error message before allowing LibTidy to display it. Currently the body
** of the function is not compiled into Tidy, but if you're interested in
** how to use the new message API, then enable it. Possible applications in
** future console Tidy might be to do things like:
** - allow user-defined filtering
** - sort the report output by line number
** - other things that are user facing and best not put into LibTidy
** proper.
*/
static Bool TIDY_CALL reportCallback(TidyMessage tmessage)
{
#if 0
TidyIterator pos;
TidyMessageArgument arg;
TidyFormatParameterType messageType;
ctmbstr messageFormat;
printf("FILTER: %s, %s\n", tidyGetMessageKey( tmessage ), tidyGetMessageOutput( tmessage ));
/* loop through the arguments, if any, and print their details */
pos = tidyGetMessageArguments( tmessage );
while ( pos )
{
arg = tidyGetNextMessageArgument( tmessage, &pos );
messageType = tidyGetArgType( tmessage, &arg );
messageFormat = tidyGetArgFormat( tmessage, &arg );
printf( " Type = %u, Format = %s, Value = ", messageType, messageFormat );
switch (messageType)
{
case tidyFormatType_STRING:
printf("%s\n", tidyGetArgValueString( tmessage, &arg ));
break;
case tidyFormatType_INT:
printf("%d\n", tidyGetArgValueInt( tmessage, &arg));
break;
case tidyFormatType_UINT:
printf("%u\n", tidyGetArgValueUInt( tmessage, &arg));
break;
case tidyFormatType_DOUBLE:
printf("%g\n", tidyGetArgValueDouble( tmessage, &arg));
break;
default:
printf("%s", "unknown so far\n");
}
}
return no; /* suppress LibTidy's own output of this message */
#else
return yes; /* needed so Tidy will not block output of this message */
#endif
}
/**
** MAIN -- let's do something here.
*/
int main( int argc, char** argv )
{
ctmbstr prog = argv[0];
ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL;
TidyDoc tdoc = tidyCreate();
int status = 0;
tmbstr locale = NULL;
Massive Revamp of the Messaging System This is a rather large refactoring of Tidy's messaging system. This was done mostly to allow non-C libraries that cannot adequately take advantage of arg_lists a chance to query report filter information for information related to arguments used in constructing an error message. Three main goals were in mind for this project: - Don't change the contents of Tidy's existing output sinks. This will ensure that changes do no affect console Tidy users, or LibTidy users who use the output sinks directly. This was accomplished 100% other than some improved cosmetics in the output. See tidy-html5-tests repository, the `refactor` and `more_messages_changes` branches for these minor diffs. - Provide an API that is simple and also extensible without having to write new error filters all the time. This was accomplished by adding the new message callback `TidyMessageCallback` that provides callback functions an opaque object representing the message, and an API to query the message for wanted details. With this, we should never have to add a new callback routine again, as additional API can simply be written against the opaque object. - The API should work the same as the rest of LibTidy's API in that it's consistent and only uses simple types with wide interoperability with other languages. Thanks to @gagern who suggested the model for the API in #409. Although the API uses the "Tidy" way off accessing data via an iterator rather than an index, this can be easily abstracted in the target language. There are two *major* API breaking changes: - Removed TidyReportFilter2 - This was only used by one application in the entire world, and was a hacky kludge that served its purpose. TidyReportCallback (né TidyReportFilter3) is much better. If, for some reason, this affects you, I recommend using TidyReportCallback instead. It's a minor change for your application. - Renamed TidyReportFilter3 to TidyReportCallback - This name is much more semantic, and much more sensible in light of improved callback system. As the name implies, it remains capable of *only* receiving callbacks for Tidy "reports." Introducing TidyMessageCallback, and a new message interrogation API. - As its name implies, it is able to capture (and optionally suppress) *all* of Tidy's output, including the dialogue messages that never make it to the existing report filters. - Provides an opaque `TidyMessage` and an API that can be used to query against it to find the juicy goodness inside. - For example, `tidyGetMessageOutput( tmessage )` will return the complete, localized message. - Another example, `tidyGetMessageLine( tmessage )` will return the line the message applies to. - You can also get information about the individual arguments that make up a message. By using the `tidyGetMessageArguments( tmessage )` itorator and `tidyGetNextMessageArgument` you will obtain an opaque `TidyMessageArgument` which has its own interrogation API. For example: - tidyGetArgType( tmessage, &iterator ); - tidyGetArgFormat( tmessage, &iterator ); - tidyGetArgValueString( tmessage, &iterator ); - …and so on. Other major changes include refactoring `messages.c` to use the new message "object" directly when emitting messages to the console or output sinks. This allowed replacement of a lot of specialized functions with generalized ones. Some of this generalizing involved modifications to the `language_xx.h` header files, and these are all positive improvements even without the above changes.
2017-03-13 17:28:57 +00:00
tidySetMessageCallback( tdoc, reportCallback);
uint contentErrors = 0;
uint contentWarnings = 0;
uint accessWarnings = 0;
errout = stderr; /* initialize to stderr */
/* Set an atexit handler. */
atexit( tidy_cleanup );
/* Set the locale for tidy's output. */
locale = tidySystemLocale(locale);
tidySetLanguage(locale);
if ( locale )
free( locale );
#if defined(_WIN32)
/* Force Windows console to use UTF, otherwise many characters will
* be garbage. Note that East Asian languages *are* supported, but
* only when Windows OS locale (not console only!) is set to an
* East Asian language.
*/
win_cp = GetConsoleOutputCP();
SetConsoleOutputCP(CP_UTF8);
#endif
#if !defined(NDEBUG) && defined(_MSC_VER)
set_log_file((char *)"temptidy.txt", 0);
2016-10-25 14:41:03 +00:00
/* add_append_log(1); */
2014-08-03 18:33:29 +00:00
#endif
2011-11-17 02:44:16 +00:00
/*
* Look for default configuration files using any of
* the following possibilities:
* - TIDY_CONFIG_FILE - from tidyplatform.h, typically /etc/tidy.conf
* - HTML_TIDY - environment variable
* - TIDY_USER_CONFIG_FILE - from tidyplatform.h, typically ~/tidy.conf
*/
2011-11-17 02:44:16 +00:00
#ifdef TIDY_CONFIG_FILE
if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) )
{
status = tidyLoadConfig( tdoc, TIDY_CONFIG_FILE );
if ( status != 0 ) {
fprintf(errout, tidyLocalizedString( TC_MAIN_ERROR_LOAD_CONFIG ), TIDY_CONFIG_FILE, status);
fprintf(errout, "\n");
}
}
#endif /* TIDY_CONFIG_FILE */
if ( (cfgfil = getenv("HTML_TIDY")) != NULL )
{
status = tidyLoadConfig( tdoc, cfgfil );
if ( status != 0 ) {
fprintf(errout, tidyLocalizedString( TC_MAIN_ERROR_LOAD_CONFIG ), cfgfil, status);
fprintf(errout, "\n");
}
}
2011-11-17 02:44:16 +00:00
#ifdef TIDY_USER_CONFIG_FILE
else if ( tidyFileExists( tdoc, TIDY_USER_CONFIG_FILE) )
{
status = tidyLoadConfig( tdoc, TIDY_USER_CONFIG_FILE );
if ( status != 0 ) {
fprintf(errout, tidyLocalizedString( TC_MAIN_ERROR_LOAD_CONFIG ), TIDY_USER_CONFIG_FILE, status);
fprintf(errout, "\n");
}
}
2011-11-17 02:44:16 +00:00
#endif /* TIDY_USER_CONFIG_FILE */
/*
* Read command line
*/
while ( argc > 0 )
{
if (argc > 1 && argv[1][0] == '-')
{
/* support -foo and --foo */
ctmbstr arg = argv[1] + 1;
if ( strcasecmp(arg, "xml") == 0)
tidyOptSetBool( tdoc, TidyXmlTags, yes );
else if ( strcasecmp(arg, "asxml") == 0 ||
strcasecmp(arg, "asxhtml") == 0 )
{
tidyOptSetBool( tdoc, TidyXhtmlOut, yes );
}
else if ( strcasecmp(arg, "ashtml") == 0 )
tidyOptSetBool( tdoc, TidyHtmlOut, yes );
else if ( strcasecmp(arg, "indent") == 0 )
{
tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
tidyOptResetToDefault( tdoc, TidyIndentSpaces );
}
else if ( strcasecmp(arg, "omit") == 0 )
tidyOptSetBool( tdoc, TidyOmitOptionalTags, yes );
else if ( strcasecmp(arg, "upper") == 0 )
tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
else if ( strcasecmp(arg, "clean") == 0 )
tidyOptSetBool( tdoc, TidyMakeClean, yes );
else if ( strcasecmp(arg, "gdoc") == 0 )
tidyOptSetBool( tdoc, TidyGDocClean, yes );
else if ( strcasecmp(arg, "bare") == 0 )
tidyOptSetBool( tdoc, TidyMakeBare, yes );
else if ( strcasecmp(arg, "raw") == 0 ||
strcasecmp(arg, "ascii") == 0 ||
strcasecmp(arg, "latin0") == 0 ||
strcasecmp(arg, "latin1") == 0 ||
strcasecmp(arg, "utf8") == 0 ||
2011-11-17 02:44:16 +00:00
#ifndef NO_NATIVE_ISO2022_SUPPORT
strcasecmp(arg, "iso2022") == 0 ||
2011-11-17 02:44:16 +00:00
#endif
#if SUPPORT_UTF16_ENCODINGS
strcasecmp(arg, "utf16le") == 0 ||
strcasecmp(arg, "utf16be") == 0 ||
strcasecmp(arg, "utf16") == 0 ||
2011-11-17 02:44:16 +00:00
#endif
#if SUPPORT_ASIAN_ENCODINGS
strcasecmp(arg, "shiftjis") == 0 ||
strcasecmp(arg, "big5") == 0 ||
2011-11-17 02:44:16 +00:00
#endif
strcasecmp(arg, "mac") == 0 ||
strcasecmp(arg, "win1252") == 0 ||
strcasecmp(arg, "ibm858") == 0 )
{
tidySetCharEncoding( tdoc, arg );
}
else if ( strcasecmp(arg, "numeric") == 0 )
tidyOptSetBool( tdoc, TidyNumEntities, yes );
else if ( strcasecmp(arg, "modify") == 0 ||
strcasecmp(arg, "change") == 0 || /* obsolete */
strcasecmp(arg, "update") == 0 ) /* obsolete */
{
tidyOptSetBool( tdoc, TidyWriteBack, yes );
}
else if ( strcasecmp(arg, "errors") == 0 )
tidyOptSetBool( tdoc, TidyShowMarkup, no );
else if ( strcasecmp(arg, "quiet") == 0 )
tidyOptSetBool( tdoc, TidyQuiet, yes );
/* Currenly user must specify a language
prior to anything that causes output */
else if ( strcasecmp(arg, "language") == 0 ||
strcasecmp(arg, "lang") == 0 )
if ( argc >= 3)
{
if ( strcasecmp(argv[2], "help") == 0 )
{
lang_help();
exit(0);
}
if ( !tidySetLanguage( argv[2] ) )
{
printf(tidyLocalizedString(TC_STRING_LANG_NOT_FOUND),
argv[2], tidyGetLanguage());
printf("\n");
}
--argc;
++argv;
}
else
{
printf( "%s\n", tidyLocalizedString(TC_STRING_LANG_MUST_SPECIFY));
}
else if ( strcasecmp(arg, "help") == 0 ||
strcasecmp(arg, "-help") == 0 ||
strcasecmp(arg, "h") == 0 || *arg == '?' )
{
help( prog );
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strcasecmp(arg, "xml-help") == 0)
{
xml_help( );
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strcasecmp(arg, "xml-error-strings") == 0)
{
xml_error_strings( tdoc );
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strcasecmp(arg, "xml-options-strings") == 0)
{
xml_options_strings( tdoc );
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strcasecmp(arg, "xml-strings") == 0)
{
xml_strings( );
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strcasecmp(arg, "help-config") == 0 )
{
optionhelp( tdoc );
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strcasecmp(arg, "help-option") == 0 )
{
if ( argc >= 3)
{
optionDescribe( tdoc, argv[2] );
}
else
{
printf( "%s\n", tidyLocalizedString(TC_STRING_MUST_SPECIFY));
}
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strcasecmp(arg, "xml-config") == 0 )
{
XMLoptionhelp( tdoc );
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strcasecmp(arg, "show-config") == 0 )
{
optionvalues( tdoc );
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strcasecmp(arg, "config") == 0 )
{
if ( argc >= 3 )
{
ctmbstr post;
tidyLoadConfig( tdoc, argv[2] );
/* Set new error output stream if setting changed */
post = tidyOptGetValue( tdoc, TidyErrFile );
if ( post && (!errfil || !samefile(errfil, post)) )
{
errfil = post;
errout = tidySetErrorFile( tdoc, post );
}
--argc;
++argv;
}
}
else if ( strcasecmp(arg, "output") == 0 ||
strcasecmp(arg, "-output-file") == 0 ||
strcasecmp(arg, "o") == 0 )
{
if ( argc >= 3 )
{
tidyOptSetValue( tdoc, TidyOutFile, argv[2] );
--argc;
++argv;
}
}
else if ( strcasecmp(arg, "file") == 0 ||
strcasecmp(arg, "-file") == 0 ||
strcasecmp(arg, "f") == 0 )
{
if ( argc >= 3 )
{
errfil = argv[2];
errout = tidySetErrorFile( tdoc, errfil );
--argc;
++argv;
}
}
else if ( strcasecmp(arg, "wrap") == 0 ||
strcasecmp(arg, "-wrap") == 0 ||
strcasecmp(arg, "w") == 0 )
{
if ( argc >= 3 )
{
uint wraplen = 0;
int nfields = sscanf( argv[2], "%u", &wraplen );
tidyOptSetInt( tdoc, TidyWrapLen, wraplen );
if (nfields > 0)
{
--argc;
++argv;
}
}
}
else if ( strcasecmp(arg, "version") == 0 ||
strcasecmp(arg, "-version") == 0 ||
strcasecmp(arg, "v") == 0 )
{
version();
tidyRelease( tdoc );
return 0; /* success */
}
else if ( strncmp(argv[1], "--", 2 ) == 0)
{
if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) )
{
/* Set new error output stream if setting changed */
ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile );
if ( post && (!errfil || !samefile(errfil, post)) )
{
errfil = post;
errout = tidySetErrorFile( tdoc, post );
}
++argv;
--argc;
}
}
2011-11-17 02:44:16 +00:00
#if SUPPORT_ACCESSIBILITY_CHECKS
else if ( strcasecmp(arg, "access") == 0 )
{
if ( argc >= 3 )
{
uint acclvl = 0;
int nfields = sscanf( argv[2], "%u", &acclvl );
tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl );
if (nfields > 0)
{
--argc;
++argv;
}
}
}
2011-11-17 02:44:16 +00:00
#endif
else
{
uint c;
ctmbstr s = argv[1];
while ( (c = *++s) != '\0' )
{
switch ( c )
{
case 'i':
tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
tidyOptResetToDefault( tdoc, TidyIndentSpaces );
break;
/* Usurp -o for output file. Anyone hiding end tags?
case 'o':
tidyOptSetBool( tdoc, TidyHideEndTags, yes );
break;
*/
case 'u':
tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
break;
case 'c':
tidyOptSetBool( tdoc, TidyMakeClean, yes );
break;
case 'g':
tidyOptSetBool( tdoc, TidyGDocClean, yes );
break;
case 'b':
tidyOptSetBool( tdoc, TidyMakeBare, yes );
break;
case 'n':
tidyOptSetBool( tdoc, TidyNumEntities, yes );
break;
case 'm':
tidyOptSetBool( tdoc, TidyWriteBack, yes );
break;
case 'e':
tidyOptSetBool( tdoc, TidyShowMarkup, no );
break;
case 'q':
tidyOptSetBool( tdoc, TidyQuiet, yes );
break;
default:
unknownOption( c );
break;
}
}
}
--argc;
++argv;
continue;
}
if ( argc > 1 )
{
htmlfil = argv[1];
#if (!defined(NDEBUG) && defined(_MSC_VER))
SPRTF("Tidying '%s'\n", htmlfil);
2016-10-25 14:41:03 +00:00
#endif /* DEBUG outout */
if ( tidyOptGetBool(tdoc, TidyEmacs) )
tidySetEmacsFile( tdoc, htmlfil );
status = tidyParseFile( tdoc, htmlfil );
}
else
{
htmlfil = "stdin";
status = tidyParseStdin( tdoc );
}
if ( status >= 0 )
status = tidyCleanAndRepair( tdoc );
if ( status >= 0 ) {
status = tidyRunDiagnostics( tdoc );
if ( !tidyOptGetBool(tdoc, TidyQuiet) ) {
/* NOT quiet, show DOCTYPE, if not already shown */
if (!tidyOptGetBool(tdoc, TidyShowInfo)) {
tidyOptSetBool( tdoc, TidyShowInfo, yes );
tidyReportDoctype( tdoc ); /* FIX20140913: like warnings, errors, ALWAYS report DOCTYPE */
tidyOptSetBool( tdoc, TidyShowInfo, no );
}
}
}
if ( status > 1 ) /* If errors, do we want to force output? */
status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 );
if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) )
{
if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 )
status = tidySaveFile( tdoc, htmlfil );
else
{
ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile );
if ( outfil ) {
status = tidySaveFile( tdoc, outfil );
} else {
2014-08-03 18:33:29 +00:00
#if !defined(NDEBUG) && defined(_MSC_VER)
static char tmp_buf[264];
sprintf(tmp_buf,"%s.html",get_log_file());
status = tidySaveFile( tdoc, tmp_buf );
SPRTF("Saved tidied content to '%s'\n",tmp_buf);
2014-08-03 18:33:29 +00:00
#else
status = tidySaveStdout( tdoc );
2014-08-03 18:33:29 +00:00
#endif
}
}
}
contentErrors += tidyErrorCount( tdoc );
contentWarnings += tidyWarningCount( tdoc );
accessWarnings += tidyAccessWarningCount( tdoc );
--argc;
++argv;
if ( argc <= 1 )
break;
} /* read command line loop */
if (!tidyOptGetBool(tdoc, TidyQuiet) &&
errout == stderr && !contentErrors)
fprintf(errout, "\n");
if (contentErrors + contentWarnings > 0 &&
!tidyOptGetBool(tdoc, TidyQuiet))
tidyErrorSummary(tdoc);
if (!tidyOptGetBool(tdoc, TidyQuiet))
tidyGeneralInfo(tdoc);
/* called to free hash tables etc. */
tidyRelease( tdoc );
/* return status can be used by scripts */
if ( contentErrors > 0 )
return 2;
if ( contentWarnings > 0 )
return 1;
/* 0 signifies all is ok */
return 0;
2011-11-17 02:44:16 +00:00
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/