Merge pull request #359 from htacg/localize_rc

HTML Tidy now can be localized.
This commit is contained in:
Jim Derry 2016-01-30 16:02:39 +08:00
commit 22998e81e8
38 changed files with 22242 additions and 3060 deletions

View file

@ -53,6 +53,14 @@ if (TIDY_CONSOLE_SHARED)
endif () endif ()
endif () endif ()
# Allow building without extra language support
option( SUPPORT_LOCALIZATIONS "Set OFF to build without additional languages." ON )
if (SUPPORT_LOCALIZATIONS)
add_definitions ( -DSUPPORT_LOCALIZATIONS=1 )
else ()
add_definitions ( -DSUPPORT_LOCALIZATIONS=0 )
endif ()
if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_COMPILER_IS_GNUCXX)
set( WARNING_FLAGS -Wall ) set( WARNING_FLAGS -Wall )
endif(CMAKE_COMPILER_IS_GNUCXX) endif(CMAKE_COMPILER_IS_GNUCXX)
@ -124,11 +132,12 @@ set ( CFILES
${SRCDIR}/access.c ${SRCDIR}/attrs.c ${SRCDIR}/istack.c ${SRCDIR}/access.c ${SRCDIR}/attrs.c ${SRCDIR}/istack.c
${SRCDIR}/parser.c ${SRCDIR}/tags.c ${SRCDIR}/entities.c ${SRCDIR}/parser.c ${SRCDIR}/tags.c ${SRCDIR}/entities.c
${SRCDIR}/lexer.c ${SRCDIR}/pprint.c ${SRCDIR}/charsets.c ${SRCDIR}/clean.c ${SRCDIR}/lexer.c ${SRCDIR}/pprint.c ${SRCDIR}/charsets.c ${SRCDIR}/clean.c
${SRCDIR}/localize.c ${SRCDIR}/config.c ${SRCDIR}/alloc.c ${SRCDIR}/message.c ${SRCDIR}/config.c ${SRCDIR}/alloc.c
${SRCDIR}/attrask.c ${SRCDIR}/attrdict.c ${SRCDIR}/attrget.c ${SRCDIR}/attrask.c ${SRCDIR}/attrdict.c ${SRCDIR}/attrget.c
${SRCDIR}/buffio.c ${SRCDIR}/fileio.c ${SRCDIR}/streamio.c ${SRCDIR}/buffio.c ${SRCDIR}/fileio.c ${SRCDIR}/streamio.c
${SRCDIR}/tagask.c ${SRCDIR}/tmbstr.c ${SRCDIR}/utf8.c ${SRCDIR}/tagask.c ${SRCDIR}/tmbstr.c ${SRCDIR}/utf8.c
${SRCDIR}/tidylib.c ${SRCDIR}/mappedio.c ${SRCDIR}/gdoc.c ) ${SRCDIR}/tidylib.c ${SRCDIR}/mappedio.c ${SRCDIR}/gdoc.c
${SRCDIR}/language.c )
set ( HFILES set ( HFILES
${INCDIR}/tidyplatform.h ${INCDIR}/tidy.h ${INCDIR}/tidyenum.h ${INCDIR}/tidyplatform.h ${INCDIR}/tidy.h ${INCDIR}/tidyenum.h
${INCDIR}/tidybuffio.h ) ${INCDIR}/tidybuffio.h )
@ -139,7 +148,7 @@ set ( LIBHFILES
${SRCDIR}/mappedio.h ${SRCDIR}/message.h ${SRCDIR}/parser.h ${SRCDIR}/mappedio.h ${SRCDIR}/message.h ${SRCDIR}/parser.h
${SRCDIR}/pprint.h ${SRCDIR}/streamio.h ${SRCDIR}/tags.h ${SRCDIR}/pprint.h ${SRCDIR}/streamio.h ${SRCDIR}/tags.h
${SRCDIR}/tmbstr.h ${SRCDIR}/utf8.h ${SRCDIR}/tidy-int.h ${SRCDIR}/tmbstr.h ${SRCDIR}/utf8.h ${SRCDIR}/tidy-int.h
${SRCDIR}/version.h ${SRCDIR}/gdoc.h ) ${SRCDIR}/version.h ${SRCDIR}/gdoc.h ${SRCDIR}/language.h )
if (MSVC) if (MSVC)
list(APPEND CFILES ${SRCDIR}/sprtf.c) list(APPEND CFILES ${SRCDIR}/sprtf.c)
list(APPEND LIBHFILES ${SRCDIR}/sprtf.h) list(APPEND LIBHFILES ${SRCDIR}/sprtf.h)

View file

@ -14,12 +14,13 @@ If you are able to build tidy from [source](https://github.com/htacg/tidy-html5)
### What to change ### What to change
Here's some examples of things you might want to make a pull request for: Here are some examples of things you might want to make a pull request for:
- New features - New features
- Bug fixes - Bug fixes
- Inefficient blocks of code - Inefficient blocks of code
- Memory problems - Memory problems
- Language translations
If you have a more deeply-rooted problem with how the program is built or some of the stylistic decisions made in the code, it is best to [create an issue](https://github.com/htacg/tidy-html5/issues/new) before putting the effort into a pull request. The same goes for new features - it might be best to check the project's direction, existing pull requests, and currently open and closed issues first. If you have a more deeply-rooted problem with how the program is built or some of the stylistic decisions made in the code, it is best to [create an issue](https://github.com/htacg/tidy-html5/issues/new) before putting the effort into a pull request. The same goes for new features - it might be best to check the project's direction, existing pull requests, and currently open and closed issues first.

19
README/LOCALIZE.md Normal file
View file

@ -0,0 +1,19 @@
# Localize HTML Tidy
HTML Tidy is used worldwide but is not very friendly to non-English speakers.
The latest versions of HTML Tidy and `libtidy` now support other languages and
regional variations, but we need your help to make it accessible to these users
by using your knowledge of other languages to make Tidy better.
Help us translate HTML Tidy into another language and as part of our project
team you will certainly earn the admiration of fellow Tidy users worldwide.
## How to Contribute
All READMEs (including [instructions][2] on how to localize Tidy) and related
materials can be found in [localize][1].
[1]: https://github.com/htacg/tidy-html5/tree/master/localize
[2]:https://github.com/htacg/tidy-html5/blob/master/localize/README.md

File diff suppressed because it is too large Load diff

View file

@ -156,12 +156,16 @@ Below is a summary and brief description of each of the options. They are listed
<xsl:call-template name="cmdline-detail"> <xsl:call-template name="cmdline-detail">
<xsl:with-param name="category">misc</xsl:with-param> <xsl:with-param name="category">misc</xsl:with-param>
</xsl:call-template> </xsl:call-template>
.SS XML
<xsl:call-template name="cmdline-detail">
<xsl:with-param name="category">xml</xsl:with-param>
</xsl:call-template>
</xsl:template> </xsl:template>
<xsl:template name="cmdline-detail"> <xsl:template name="cmdline-detail">
<!-- <!--
For each option in one of the 3 categories/classes, provide its For each option in one of the categories/classes, provide its
1. names 1. names
2. description 2. description
3. equivalent configuration option 3. equivalent configuration option

View file

@ -645,6 +645,9 @@ typedef Bool (TIDY_CALL *TidyReportFilter)( TidyDoc tdoc, TidyReportLevel lvl,
typedef Bool (TIDY_CALL *TidyReportFilter2)( TidyDoc tdoc, TidyReportLevel lvl, typedef Bool (TIDY_CALL *TidyReportFilter2)( TidyDoc tdoc, TidyReportLevel lvl,
uint line, uint col, ctmbstr mssg, va_list args ); uint line, uint col, ctmbstr mssg, va_list args );
typedef Bool (TIDY_CALL *TidyReportFilter3)( TidyDoc tdoc, TidyReportLevel lvl,
uint line, uint col, ctmbstr code, va_list args );
/** Give Tidy a filter callback to use */ /** Give Tidy a filter callback to use */
TIDY_EXPORT Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TIDY_EXPORT Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc,
TidyReportFilter filtCallback ); TidyReportFilter filtCallback );
@ -652,6 +655,9 @@ TIDY_EXPORT Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc,
TIDY_EXPORT Bool TIDY_CALL tidySetReportFilter2( TidyDoc tdoc, TIDY_EXPORT Bool TIDY_CALL tidySetReportFilter2( TidyDoc tdoc,
TidyReportFilter2 filtCallback ); TidyReportFilter2 filtCallback );
TIDY_EXPORT Bool TIDY_CALL tidySetReportFilter3( TidyDoc tdoc,
TidyReportFilter3 filtCallback );
/** Set error sink to named file */ /** Set error sink to named file */
TIDY_EXPORT FILE* TIDY_CALL tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam ); TIDY_EXPORT FILE* TIDY_CALL tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam );
/** Set error sink to given buffer */ /** Set error sink to given buffer */

View file

@ -76,6 +76,12 @@ typedef enum
/** Option IDs Used to get/set option values. /** Option IDs Used to get/set option values.
These TidyOptionId are used throughout libtidy, and also
have associated localized strings to describe them.
Note this enum MUST start at zero due to historical design-time
decisions that make assumptions about this starting value.
*/ */
typedef enum typedef enum
{ {
@ -209,6 +215,7 @@ typedef enum
N_TIDY_OPTIONS /**< Must be last */ N_TIDY_OPTIONS /**< Must be last */
} TidyOptionId; } TidyOptionId;
/** Option data types /** Option data types
*/ */
typedef enum typedef enum
@ -267,6 +274,7 @@ typedef enum
TidySortAttrAlpha TidySortAttrAlpha
} TidyAttrSortStrategy; } TidyAttrSortStrategy;
/* I/O and Message handling interface /* I/O and Message handling interface
** **
** By default, Tidy will define, create and use ** By default, Tidy will define, create and use
@ -280,6 +288,11 @@ typedef enum
*/ */
/** Message severity level /** Message severity level
* These TidyReportLevel are used throughout libtidy, but don't
* have associated localized strings to describe them because
* TidyReportLevel is externally-facing, and changing the enum
* starting int can break existing API's for poorly-written
* applications using libtidy. See enum `TidyReportLevelKeys`.
*/ */
typedef enum typedef enum
{ {
@ -292,6 +305,22 @@ typedef enum
TidyFatal /**< Crash! */ TidyFatal /**< Crash! */
} TidyReportLevel; } TidyReportLevel;
/** Message severity level - string lookup keys
* These TidyReportLevelKeys are used throughout libtidy, and
* have associated localized strings to describe them. They
* correspond to enum `TidyReportLevel`.
*/
typedef enum
{
TidyInfoString = 600,
TidyWarningString,
TidyConfigString,
TidyAccessString,
TidyErrorString,
TidyBadDocumentString,
TidyFatalString
} TidyReportLevelKeys;
/* Document tree traversal functions /* Document tree traversal functions
*/ */

View file

@ -60,6 +60,11 @@ extern "C" {
#define SUPPORT_ACCESSIBILITY_CHECKS 1 #define SUPPORT_ACCESSIBILITY_CHECKS 1
#endif #endif
/* Enable/disable support for additional languages */
#ifndef SUPPORT_LOCALIZATIONS
#define SUPPORT_LOCALIZATIONS 1
#endif
/* Convenience defines for Mac platforms */ /* Convenience defines for Mac platforms */

8
localize/.gitignore vendored Normal file
View file

@ -0,0 +1,8 @@
# Ignore user-generated files:
Gemfile.lock
*.pot
*.po
*.h
translations/*.*
!translations/tidy.pot
!translations/*.po

4
localize/Gemfile Normal file
View file

@ -0,0 +1,4 @@
source 'https://rubygems.org'
gem 'thor'
gem 'i18n'

448
localize/README.md Normal file
View file

@ -0,0 +1,448 @@
# README.md - Localize HTML Tidy
Thank you for your interest in helping us localize HTML Tidy and LibTidy. Users
throughout the world will thank you.
The document describes Tidy's localization philosophy and instructs you on how
you can use standard `gettext` tools to generate language and region
localizations that will work with Tidy. Optionally instructions are included in
the event that you want to build Tidy with your new language.
## Contents:
- [Introduction](#introduction)
- [PO and POT files](#po-and-pot-files)
- [H files](#h-files)
- [Differences for translators](#differences-for-translators)
- [`poconvert.rb` versus `gettext`' tools](#poconvertrb-versus-gettext-tools)
- [How to Contribute](#how-to-contribute)
- [Find or Create the Translation Files](#find-or-create-the-translation-files)
- [Issue a Pull Request to HTML Tidy](#issue-a-pull-request-to-html-tidy)
- [Using Git appropriately](#using-git-appropriately)
- [Repository Notes](#repository-notes)
- [Adding Languages to Tidy](#adding-languages-to-tidy)
- [Best Practices](#best-practices)
- [Language Inheritance](#language-inheritance)
- [String Inheritance](#string-inheritance)
- [Base Language First and Regional Variants](#base-language-first-and-regional-variants)
- [Positional Parameters](#positional-parameters)
- [Testing](#testing)
- [Command line option](#command-line-option)
- [Changing your locale](#changing-your-locale)
- [East Asian Languages](#east-asian-languages)
- [gettext](#gettext)
- [poconvert.rb](#poconvertrb)
- [Create a new POT file](#create-a-new-pot-file)
- [Create a new POT file with non-English `msgid` strings](#create-a-new-pot-file-with-non-english-msgid-strings)
- [Convert an existing H to PO](#convert-an-existing-h-to-po)
- [Convert an existing H to PO using a different `msgid` language](#convert-an-existing-h-to-po-using-a-different-msgid-language)
- [Create a blank PO file for a particular region](#create-a-blank-po-file-for-a-particular-region)
- [Create a Tidy Language Header H file](#create-a-tidy-language-header-h-file)
- [Prepare your non-English PO for a PR](#prepare-your-non-english-po-for-a-pr)
- [Update your PO to match the new POT](#update-your-po-to-match-the-new-pot)
- [Help Tidy Get Better](#help-tidy-get-better)
## Introduction
HTML Tidy is built around the localization file `language_en.h`; without this
file HTML Tidy will not work. As such _all_ language localization work
originates from this single file.
Language localizations use header files that are identical to `language_en.h`,
except that they have different strings. For the convenience of language
translators, though, Tidy source code includes a Ruby `poconvert.rb` script
that enables _optional_ gettext PO/POT work streams that may be more comfortable
to them.
### PO and POT files
HTML Tidy provides PO and POT files for language translations. The file
`tidy.pot` is the correct template to use as a basis for new translations. In a
typical `gettext` workflow a translator will use the `tidy.pot` file to create a
language translation PO file that contains original English strings and the
translated strings.
If a language has already been translated (or if the translation has begun) then
PO files may already exist. These files are named `language_ll.po` or
`langage_ll_CC.po`, where `ll` represents the language code, and optionally,
`CC` represents the region code of the translation.
Tidy does not use MO files that `gettext` tools generate from PO files.
Please note that these PO and POT files are provided for translator convenience
only. Tidy's [header files](#h-files) constitute the true, controlled source
code for Tidy.
### H files
Tidy does not use `gettext` to display strings and so `gettext`-generated MO
files are not necessary. Instead translated PO files must be converted to Tidy's
language header H file format. Translators are not required to perform this
step, but we provide a tool to perform this function if desired.
### Differences for translators
Experienced users and translators of PO files may note that we use the PO file's
`msgctxt` field a bit uniquely. Rather than point to a line in the source code,
it contains a reference to the string's identifier. Because the PO format does
not allow for arbitrary metadata this is a requirement for generating our
header files.
If you're the type of translator the does dig into the source code, then this
`msgtext` symbol is still useful to you and adds a single extra step to finding
where a string is in context: a symbol or string search using the `msgctxt`
value will reveal the context in source code.
Finally the `msgid` field is a throwaway; Tidy's language tools do not use this
value and so it's only for the translator's convenience. This fact makes it
convenient for translators to translate from languages other than English,
which is fully supported by our tools.
### `poconvert.rb` versus `gettext`' tools
Please don't use `gettext`' tools with our PO and POT files (unless you are
using our strings for a different project). Instead all workflows can be
accomplished with our `poconvert.rb` tool.
[More information about this tool](#h-files) can be found below.
## How to Contribute
### Find or Create the Translation Files
If you've not already cloned the HTML Tidy source code repository that will be
your first step.
In the `localize\translations\` directory you can find existing languages, e.g.,
- `tidy.pot` (Tidy's POT template for translations).
- `language_en_gb.po` (British English variants for the built in language)
- …and perhaps more.
In the `src\` directory you can find the master files for existing languages,
e.g.,
- `language_en.h` (Tidy's native, built-in language, mostly U.S. English)
- `language_en_gb.po` (British English variants for the built in language)
- …and perhaps more.
Although the header files are the master files for HTML Tidy, we understand that
not all potential translators want to edit C files directly. Therefore as an
option, the following workflow to use POT and PO files is offered.
If the language that you want to work on is already present:
- Simply open the file in your favorite PO editor and then get to work.
- Note that although you can use a text editor, we recommend that you use a
dedicated PO editor so that you don't accidentally make the file illegible
to our conversion utility.
If the language that you want to work on is _not_ already present:
- You can open `tidy.pot` in your favorite PO editor and use its functions
to begin a new translation into your desired language.
- Note that although you can use a text editor, we recommend that you use a
dedicated PO editor so that you don't accidentally make the file illegible
to our conversion utility.
- To perform the work manually:
- Copy `tidy.pot` to `language_ll.po` (for a non-regional variant, or base
language), or to `language_ll_cc.po` (for a region-specific variant),
where `ll` indicates the two letter language code and `cc` indicates the
two letter region or country code.
- Change the pertinent PO header section accordingly.
- Use `poconvert.rb` to generate a PO:
- `poconvert.rb msginit --locale ll`, where `ll` indicates the language
code for the language you want to translate to. The tool recognizes the
same languages as `gettext`' `msginit`. If your chosen language is not
supported, then please see the manual method, above.
- See also `poconvert.rb help` for more options.
- See GNU's [The Format of PO Files](https://www.gnu.org/software/gettext/manual/html_node/PO-Files.html)
for more specific instructions and important information.
### Issue a Pull Request to HTML Tidy
Once your translation is complete commit your entire HTML Tidy repository to
GitHub and issue a pull request (PR) against the `master` branch. If accepted a
friendly developer will convert your PO into a format useful to Tidy if your
PR is a PO, or will simply merge your changed header file if you changed it
directly.
You are also welcome to perform any conversions yourself, add new languages to
Tidy, and issue a PR for the whole change.
### Using Git appropriately
1. Fork the repository to your GitHub account.
2. Optionally create a **topical branch** - a branch whose name is succinct but
explains what you're doing, such as "localize Portuguese".
3. Make your changes, committing at logical breaks.
4. Push your work to your personal account.
5. [Create a pull request](https://help.github.com/articles/using-pull-requests).
6. Watch for comments or acceptance.
### Repository Notes
If you are working with PO files then please **only** commit PO files with
_English_ `msgid` fields. The `gettext` convention specifies only English
`msgid`, and other translators may not understand the original strings.
Our `poconvert.rb` script can generate PO files using another language as
`msgid`. This can be very useful if it's easier for you to translate from
another language instead of English. It can also be useful for translating from
a base language to a regional variant, such as from Spanish to Mexican Spanish.
If you choose to work locally with a non-English PO, you can easily convert
your PO to a Tidy header file and back to an English-based PO using our
`poconvert.rb` script. See its documentation (`poconvert.rb help`) for
instructions.
## Adding Languages to Tidy
Although we don't require you to follow these steps to contribute a language
to Tidy, you may want to add the language to Tidy yourself to test the
translation, or to save one of the developer team a few extra steps.
- Generate the header files if necessary:
- Convert your PO file to a Tidy header file by executing
`poconvert.rb msgfmt <path_to_your_file.po>`. Note that on Windows you
will likely have to preface this line with `ruby`.
- The tool should generate a file named `language_ll_cc.h` in the working
directory, where `ll_cc` will be replaced with the language/region of your
translation.
- Copy this `.h` file into `src\`.
- Modify Tidy's source:
- Edit the file `src\language.c` to ensure that the new `.h` file you added
is in the `#include` section.
- Look for the `static tidyLanguagesType tidyLanguages` structure starting
near line 40, and look for the comment `These languages are installed.`.
You can add your new language to the list along with the other languages
present, following the same format.
- Build Tidy:
- Build Tidy per the usual instructions, and try it out using the `-lang`
option.
## Best Practices
### Language Inheritance
HTML Tidy will fallback from the specified language to the base language and
then finally to the default English as required. This means, for example, a
programmer might set `libtidy` to use “es_mx”, and if it doesnt exist Tidy
will automatically use “es”. If that doesnt exist `libtidy` will continue to
use whatever language it is currently using.
### String Inheritance
HTML Tidy will also fallback for individual strings. For example if `libtidy`
is set to use “es_mx” and a particular string is requested and not found, the
library will look for the string in “es”. If the string is not found there then
the “en” string will be given.
### Base Language First and Regional Variants
Because of this inheritance we hope to localize base languages first, as the
only strings then required for regional variants are the strings that differ.
This will help us keep HTML Tidy and `libtidy` small.
If you are working on a regional variation (such as “us_CA”) please only
localize strings that are actually _different_ from the base language!
### Positional Parameters
Please note that HTML Tidy does not current support positional parameters. Due
to the nature of most of Tidy's output, it's not expected that they will be
required. In any case, please translate strings so that substitution values are
in the same order as the original string.
## Testing
We hope to develop a comprehensive test suite in the future, but in the meantime
you can test localized output like this.
### Command line option
Use the `-lang`/`-language` option and specify a POSIX or Windows language name.
This option should be first option used because the console application parses
and acts on options first-in, first-out.
### Changing your locale
On Unix/Mac and Linux operating systems you can change your shells locale
temporarily with:
`export LANG=en_GB`
`export LC_ALL=en_GB`
…substituting, of course the language of your choice.
### East Asian Languages
East Asian languages are completely supported and have been tested on Linux,
Mac OS X, and Windows, although Windows requires you to set your operating
system (not the console locale!) to an East Asian locale to enable this in
Windows Console and PowerShell. Note that PowerShell ISE always supports East
Asian languages without requiring you to change your operating system locale.
## gettext
Although HTML Tidy uses `gettext`-compatible tools and PO files for language
localization, Tidy itself does _not_ use `gettext`. Tidy's build philosophy is
build it anywhere and build it with anything. As `gettext` is not universally
available on every platform under the sun, Tidy cannot count on `gettext`.
Instead Tidy builds all translations into its library (and command line
executable if built monolithically), and can run on virtually any general
purpose computer with any operating system.
While this does not pose a significant problem for storage or execution space
on modern PC's, we understand that certain applications may still be space
critical. As such it's trivial to build Tidy without this extra language
support using the `-DSUPPORT_LOCALIZATIONS=NO` switch.
## poconvert.rb
Tidy's source code includes a Ruby batch file called `poconvert.rb` which can be
used to generate POT, PO, and H files, and convert them back and forth. It has
been designed to work in a similar fashion as `gettext`'s tools, and includes
conveniences that let translators work in different source languages. Please
use `poconvert.rb help` for complete information (`ruby poconvert.rb help` on
Windows).
Note that you must install Ruby on your system, as well as install the required
dependencies. These can be manually installed with `[sudo] gem install xxx`,
where `xxx` represents the packages listed in `Gemfile`. For convenience, if you
have the Bundler gem installed, you can `bundle install` for automated
dependency installation.
Also take note of these two important characteristics:
- `poconvert.rb` is currently dependent on its current path. You can move it
from its current location, but you will have to change the values of the
`@@default_en` and `@@header_template` variables within the script.
- All files will be output in the current working directory. This will prevent
accidental overwrites of important files while we all get used to the
workflows.
Below are some sample workflows.
### Create a new POT file
Although we provide `tidy.pot` in the source, you can generate your own.
`./poconvert.rb xgettext`
This will put a fresh, new copy of `tidy.pot` in the working directory.
### Create a new POT file with non-English `msgid` strings
Although `gettext` officially recognizes English as the one, true source
language for PO and POT files, if you're more comfortable translating from a
non-English language, we can support you.
`./poconvert.rb xgettext <language_cc_ll.h>`
Where `<language_cc_ll.h>` is the path to an existing Tidy language header file.
This will produce a `tidy.pot` using the translated strings as `msgid`, using
English as a backup when translated strings are not present.
This can be valuable in producing regional variant translations, e.g., when
translating from `es` to `es_mx`.
### Convert an existing H to PO
In many cases you may want to have a fresh PO generated from a Tidy H file.
This can be accomplished with:
`./poconvert.rb msgunfmt <language_cc_ll.h>`
### Convert an existing H to PO using a different `msgid` language
If you want to generate a fresh PO file from a Tidy H file, but _also_ want to
have untranslated strings from a language other than English, try:
`./poconvert.rb msgunfmt <language_cc_ll.h> --baselang=<other-language_cc_ll.h>`
### Create a blank PO file for a particular region
`./poconvert.rb msginit`
or
`./poconvert.rb msginit --locale=LOCALE`
The first example will try to guess your current region, and the second will
use a region specified.
Tidy only knows about the same regions that `gettext` knows; if our `msginit`
does not recognize the region you specify, you will have to create a new PO
and modify the region settings yourself.
To create the blank PO using `msgid` strings from a different Tidy language,
you can use:
`./poconvert.rb msginit <language_cc_ll.h> [--locale=LOCALE]`
### Create a Tidy Language Header H file
When you're ready to include the language in Tidy, you can generate its header
file with:
`./poconvert.rb msgfmt <language_cc_ll.po>`
In the event you are creating a regional variant of a language, it's an
excellent idea to have Tidy exclude strings that are already present in the
parent language in order to reduce library and executable size. For example
if `es` already includes the string "archivo" there is no reason for your
translation to `es_mx` to include it, too. You can tell `poconvert.rb` to
exclude strings matching another localization like so:
`./poconvert.rb msgfmt <language_cc_ll.po> --baselang=<other-language_cc_ll.h>`
### Prepare your non-English PO for a PR
Although we have provided tools to allow you to work in languages other than
English, we can only accept PO's in the repository that have English `msgid`
fields. It's easy to convert your PO back to English:
`./poconvert msgfmt <language_cc_ll.po>`
`./poconvert msgunfmt <language_cc_ll.h>`
The first command converts your non-standard PO into a Tidy Language Header
file, and the second will create a fresh, new PO file from the header that
you've just created.
### Update your PO to match the new POT
If Tidy's POT changes, e.g., new strings are added, new comments, etc., the
simplest way to update your PO is to convert it to a header (which normalizes
it to the latest Tidy standard), and then convert the header to a new PO again.
`./poconvert msgfmt <language_cc_ll.po>`
`./poconvert msgunfmt <language_cc_ll.h>`
## Help Tidy Get Better
It goes without saying **all help is appreciated**. We need to work together to
make Tidy better!

View file

@ -0,0 +1,70 @@
#ifndef language_<%= po_content.language %>_h
#define language_<%= po_content.language %>_h
/*
* language_<%= po_content.language %>.h
* Localization support for HTML Tidy.
*
*
* This file is a localization file for HTML Tidy. It will have been machine
* generated or created and/or edited by hand. Both are valid options, but
* please help keep our localization efforts simple to maintain by maintaining
* the structure of this file, and changing the check box below if you make
* changes (so others know the file origin):
*
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the
* language (and maybe region) "<%= po_content.language %>". The source of
* these strings is a gettext PO file in Tidy's source, probably called
* "language_<%= po_content.language %>.po".
*
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file
* directly (and check this box). If you prefer to edit PO files then use
* `poconvert.rb msgunfmt language_<%= po_content.language %>.h` (our own
* conversion tool) to generate a fresh PO from this file first!
*
* (c) 2015 HTACG
* See tidy.h and access.h for the copyright notice.
*
* Template Created by Jim Derry on 01/14/2016.
*
* Orginating PO file metadata:
* PO_LAST_TRANSLATOR=<%= po_content.last_translator %>
* PO_REVISION_DATE=<%= po_content.po_revision_date %>
*/
#ifdef _MSC_VER
#pragma execution_character_set("utf-8")
#endif
#include "language.h"
#include "access.h"
#include "message.h"
/**
* This language-specific function returns the correct pluralForm
* to use given n items, and is used as a member of each language
* definition.
*/
static uint whichPluralForm_<%= po_content.language %>(uint n) {
/* <%= po_content.plural_forms %> */
return <%= po_content.plural_formula %>
}
/**
* This structure specifies all of the strings needed by Tidy for a
* single language. Static definition in a header file makes it
* easy to include and exclude languages without tinkering with
* the build system.
*/
static languageDefinition language_<%= po_content.language %> = { whichPluralForm_<%= po_content.language %>, {
/***************************************
** This MUST be present and first.
** Specify the code for this language.
***************************************/
<%= report_body %>
<%= report_body_last %>
}};
#endif /* language_<%= po_content.language %>_h */

1140
localize/poconvert.rb Executable file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -32,6 +32,7 @@
#include "forward.h" #include "forward.h"
#include "message.h"
#if SUPPORT_ACCESSIBILITY_CHECKS #if SUPPORT_ACCESSIBILITY_CHECKS
@ -114,10 +115,17 @@ struct _TidyAccessImpl
Offset accessibility error codes by FIRST_ACCESS_ERR to avoid conflict with Offset accessibility error codes by FIRST_ACCESS_ERR to avoid conflict with
other error codes defined in message.h and used in localize.c. other error codes defined in message.h and used in localize.c.
*/
enum accessErrorCodes These accessErrorCodes are used throughout libtidy, and also
have associated localized strings to describe them.
IMPORTANT: to maintain compatability with TidyMessageFilter3, if you add
or remove keys from this enum, ALSO add/remove the corresponding key
in language.c:tidyErrorFilterKeysStruct[]!
*/
typedef enum
{ {
FIRST_ACCESS_ERR = 1000, /* must be first */ FIRST_ACCESS_ERR = CODES_TIDY_ERROR_LAST + 1, /* must be first */
/* [1.1.1.1] */ IMG_MISSING_ALT, /* [1.1.1.1] */ IMG_MISSING_ALT,
/* [1.1.1.2] */ IMG_ALT_SUSPICIOUS_FILENAME, /* [1.1.1.2] */ IMG_ALT_SUSPICIOUS_FILENAME,
@ -252,7 +260,7 @@ enum accessErrorCodes
/* [13.10.1.1] */ SKIPOVER_ASCII_ART, /* [13.10.1.1] */ SKIPOVER_ASCII_ART,
LAST_ACCESS_ERR /* must be last */ LAST_ACCESS_ERR /* must be last */
}; } accessErrorCodes;
void TY_(AccessibilityHelloMessage)( TidyDocImpl* doc ); void TY_(AccessibilityHelloMessage)( TidyDocImpl* doc );

View file

@ -62,11 +62,10 @@ typedef struct _tidy_config
} TidyConfigImpl; } TidyConfigImpl;
/* Used to build a table of documentation cross-references. */
typedef struct { typedef struct {
TidyOptionId opt; /**< Identifier. */ TidyOptionId opt; /**< Identifier. */
ctmbstr doc; /**< HTML text */ TidyOptionId const *links; /**< Cross references. Last element must be 'TidyUnknownOption'. */
TidyOptionId const *links; /**< Cross references.
Last element must be 'TidyUnknownOption'. */
} TidyOptionDoc; } TidyOptionDoc;

906
src/language.c Normal file
View file

@ -0,0 +1,906 @@
/*
* language.c
* Localization support for HTML Tidy.
*
* (c) 2015 HTACG
* See tidy.h and access.h for the copyright notice.
*
* Created by Jim Derry on 11/28/15.
*/
#include "language.h"
#include "language_en.h"
#if SUPPORT_LOCALIZATIONS
#include "language_en_gb.h"
#include "language_es.h"
#include "language_es_mx.h"
#include "language_zh_cn.h"
#endif
#include "tmbstr.h"
#include "locale.h"
/**
* This structure type provides universal access to all of Tidy's strings.
*/
typedef struct {
languageDefinition *currentLanguage;
languageDefinition *fallbackLanguage;
languageDefinition *languages[];
} tidyLanguagesType;
/**
* This single structure contains all localizations. Note that we preset
* `.currentLanguage` to language_en, which is Tidy's default language.
*/
static tidyLanguagesType tidyLanguages = {
&language_en, /* current language */
&language_en, /* first fallback language */
{
/* Required localization! */
&language_en,
#if SUPPORT_LOCALIZATIONS
/* These additional languages are installed. */
&language_en_gb,
&language_es,
&language_es_mx,
&language_zh_cn,
#endif
NULL /* This array MUST be null terminated. */
}
};
/**
* This structure maps old-fashioned Windows strings
* to proper POSIX names (modern Windows already uses
* POSIX names).
*/
static const tidyLocaleMapItem localeMappings[] = {
{ "america", "en_us" },
{ "american english", "en_us" },
{ "american-english", "en_us" },
{ "american", "en_us" },
{ "aus", "en_au" },
{ "australia", "en_au" },
{ "australian", "en_au" },
{ "austria", "de_at" },
{ "aut", "de_at" },
{ "bel", "nl_be" },
{ "belgian", "nl_be" },
{ "belgium", "nl_be" },
{ "bra", "pt-br" },
{ "brazil", "pt-br" },
{ "britain", "en_gb" },
{ "can", "en_ca" },
{ "canada", "en_ca" },
{ "canadian", "en_ca" },
{ "che", "de_ch" },
{ "china", "zh_cn" },
{ "chinese-simplified", "zh" },
{ "chinese-traditional", "zh_tw" },
{ "chinese", "zh" },
{ "chn", "zh_cn" },
{ "chs", "zh" },
{ "cht", "zh_tw" },
{ "csy", "cs" },
{ "cze", "cs_cz" },
{ "czech", "cs_cz" },
{ "dan", "da" },
{ "danish", "da" },
{ "dea", "de_at" },
{ "denmark", "da_dk" },
{ "des", "de_ch" },
{ "deu", "de" },
{ "dnk", "da_dk" },
{ "dutch-belgian", "nl_be" },
{ "dutch", "nl" },
{ "ell", "el" },
{ "ena", "en_au" },
{ "enc", "en_ca" },
{ "eng", "eb_gb" },
{ "england", "en_gb" },
{ "english-american", "en_us" },
{ "english-aus", "en_au" },
{ "english-can", "en_ca" },
{ "english-nz", "en_nz" },
{ "english-uk", "eb_gb" },
{ "english-us", "en_us" },
{ "english-usa", "en_us" },
{ "english", "en" },
{ "enu", "en_us" },
{ "enz", "en_nz" },
{ "esm", "es-mx" },
{ "esn", "es" },
{ "esp", "es" },
{ "fin", "fi" },
{ "finland", "fi_fi" },
{ "finnish", "fi" },
{ "fra", "fr" },
{ "france", "fr_fr" },
{ "frb", "fr_be" },
{ "frc", "fr_ca" },
{ "french-belgian", "fr_be" },
{ "french-canadian", "fr_ca" },
{ "french-swiss", "fr_ch" },
{ "french", "fr" },
{ "frs", "fr_ch" },
{ "gbr", "en_gb" },
{ "german-austrian", "de_at" },
{ "german-swiss", "de_ch" },
{ "german", "de" },
{ "germany", "de_de" },
{ "grc", "el_gr" },
{ "great britain", "en_gb" },
{ "greece", "el_gr" },
{ "greek", "el" },
{ "hkg", "zh_hk" },
{ "holland", "nl_nl" },
{ "hong kong", "zh_hk" },
{ "hong-kong", "zh_hk" },
{ "hun", "hu" },
{ "hungarian", "hu" },
{ "hungary", "hu_hu" },
{ "iceland", "is_is" },
{ "icelandic", "is" },
{ "ireland", "en_ie" },
{ "irl", "en_ie" },
{ "isl", "is" },
{ "ita", "it" },
{ "ita", "it_it" },
{ "italian-swiss", "it_ch" },
{ "italian", "it" },
{ "italy", "it_it" },
{ "its", "it_ch" },
{ "japan", "ja_jp" },
{ "japanese", "ja" },
{ "jpn", "ja" },
{ "kor", "ko" },
{ "korea", "ko_kr" },
{ "korean", "ko" },
{ "mex", "es-mx" },
{ "mexico", "es-mx" },
{ "netherlands", "nl_nl" },
{ "new zealand", "en_nz" },
{ "new-zealand", "en_nz" },
{ "nlb", "nl_be" },
{ "nld", "nl" },
{ "non", "nn" },
{ "nor", "nb" },
{ "norway", "no" },
{ "norwegian-bokmal", "nb" },
{ "norwegian-nynorsk", "nn" },
{ "norwegian", "no" },
{ "nz", "en_nz" },
{ "nzl", "en_nz" },
{ "plk", "pl" },
{ "pol", "pl-pl" },
{ "poland", "pl-pl" },
{ "polish", "pl" },
{ "portugal", "pt-pt" },
{ "portuguese-brazil", "pt-br" },
{ "portuguese", "pt" },
{ "pr china", "zh_cn" },
{ "pr-china", "zh_cn" },
{ "prt", "pt-pt" },
{ "ptb", "pt-br" },
{ "ptg", "pt" },
{ "rus", "ru" },
{ "russia", "ru-ru" },
{ "russian", "ru" },
{ "sgp", "zh_sg" },
{ "singapore", "zh_sg" },
{ "sky", "sk" },
{ "slovak", "sk" },
{ "spain", "es-es" },
{ "spanish-mexican", "es-mx" },
{ "spanish-modern", "es" },
{ "spanish", "es" },
{ "sve", "sv" },
{ "svk", "sk-sk" },
{ "swe", "sv-se" },
{ "sweden", "sv-se" },
{ "swedish", "sv" },
{ "swiss", "de_ch" },
{ "switzerland", "de_ch" },
{ "taiwan", "zh_tw" },
{ "trk", "tr" },
{ "tur", "tr-tr" },
{ "turkey", "tr-tr" },
{ "turkish", "tr" },
{ "twn", "zh_tw" },
{ "uk", "en_gb" },
{ "united kingdom", "en_gb" },
{ "united states", "en_us" },
{ "united-kingdom", "en_gb" },
{ "united-states", "en_us" },
{ "us", "en_us" },
{ "usa", "en_us" },
/* MUST be last. */
{ NULL, NULL }
};
/**
* Provides the mapping for LibTidy users to map between an opaque key
* and an error message value. See `tidyErrorFilterKeys[]`, below.
*/
typedef struct tidyErrorFilterKeyItem {
ctmbstr key;
int value;
} tidyErrorFilterKeyItem;
/**
* LibTidy users may want to use `TidyReportFilter3` to enable their own
* localization lookup features. Because Tidy's errors codes are enums the
* specific values can change over time. This table will ensure that LibTidy
* users always have a static value available for use.
*/
static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "CODES_TIDY_ERROR_FIRST", CODES_TIDY_ERROR_FIRST },
{ "MISSING_SEMICOLON", MISSING_SEMICOLON },
{ "MISSING_SEMICOLON_NCR", MISSING_SEMICOLON_NCR },
{ "UNKNOWN_ENTITY", UNKNOWN_ENTITY },
{ "UNESCAPED_AMPERSAND", UNESCAPED_AMPERSAND },
{ "APOS_UNDEFINED", APOS_UNDEFINED },
{ "MISSING_ENDTAG_FOR", MISSING_ENDTAG_FOR },
{ "MISSING_ENDTAG_BEFORE", MISSING_ENDTAG_BEFORE },
{ "DISCARDING_UNEXPECTED", DISCARDING_UNEXPECTED },
{ "NESTED_EMPHASIS", NESTED_EMPHASIS },
{ "NON_MATCHING_ENDTAG", NON_MATCHING_ENDTAG },
{ "TAG_NOT_ALLOWED_IN", TAG_NOT_ALLOWED_IN },
{ "MISSING_STARTTAG", MISSING_STARTTAG },
{ "UNEXPECTED_ENDTAG", UNEXPECTED_ENDTAG },
{ "USING_BR_INPLACE_OF", USING_BR_INPLACE_OF },
{ "INSERTING_TAG", INSERTING_TAG },
{ "SUSPECTED_MISSING_QUOTE", SUSPECTED_MISSING_QUOTE },
{ "MISSING_TITLE_ELEMENT", MISSING_TITLE_ELEMENT },
{ "DUPLICATE_FRAMESET", DUPLICATE_FRAMESET },
{ "CANT_BE_NESTED", CANT_BE_NESTED },
{ "OBSOLETE_ELEMENT", OBSOLETE_ELEMENT },
{ "PROPRIETARY_ELEMENT", PROPRIETARY_ELEMENT },
{ "UNKNOWN_ELEMENT", UNKNOWN_ELEMENT },
{ "TRIM_EMPTY_ELEMENT", TRIM_EMPTY_ELEMENT },
{ "COERCE_TO_ENDTAG", COERCE_TO_ENDTAG },
{ "ILLEGAL_NESTING", ILLEGAL_NESTING },
{ "NOFRAMES_CONTENT", NOFRAMES_CONTENT },
{ "CONTENT_AFTER_BODY", CONTENT_AFTER_BODY },
{ "INCONSISTENT_VERSION", INCONSISTENT_VERSION },
{ "MALFORMED_COMMENT", MALFORMED_COMMENT },
{ "BAD_COMMENT_CHARS", BAD_COMMENT_CHARS },
{ "BAD_XML_COMMENT", BAD_XML_COMMENT },
{ "BAD_CDATA_CONTENT", BAD_CDATA_CONTENT },
{ "INCONSISTENT_NAMESPACE", INCONSISTENT_NAMESPACE },
{ "DOCTYPE_AFTER_TAGS", DOCTYPE_AFTER_TAGS },
{ "MALFORMED_DOCTYPE", MALFORMED_DOCTYPE },
{ "UNEXPECTED_END_OF_FILE", UNEXPECTED_END_OF_FILE },
{ "DTYPE_NOT_UPPER_CASE", DTYPE_NOT_UPPER_CASE },
{ "TOO_MANY_ELEMENTS", TOO_MANY_ELEMENTS },
{ "UNESCAPED_ELEMENT", UNESCAPED_ELEMENT },
{ "NESTED_QUOTATION", NESTED_QUOTATION },
{ "ELEMENT_NOT_EMPTY", ELEMENT_NOT_EMPTY },
{ "ENCODING_IO_CONFLICT", ENCODING_IO_CONFLICT },
{ "MIXED_CONTENT_IN_BLOCK", MIXED_CONTENT_IN_BLOCK },
{ "MISSING_DOCTYPE", MISSING_DOCTYPE },
{ "SPACE_PRECEDING_XMLDECL", SPACE_PRECEDING_XMLDECL },
{ "TOO_MANY_ELEMENTS_IN", TOO_MANY_ELEMENTS_IN },
{ "UNEXPECTED_ENDTAG_IN", UNEXPECTED_ENDTAG_IN },
{ "REPLACING_ELEMENT", REPLACING_ELEMENT },
{ "REPLACING_UNEX_ELEMENT", REPLACING_UNEX_ELEMENT },
{ "COERCE_TO_ENDTAG_WARN", COERCE_TO_ENDTAG_WARN },
{ "UNKNOWN_ATTRIBUTE", UNKNOWN_ATTRIBUTE },
{ "INSERTING_ATTRIBUTE", INSERTING_ATTRIBUTE },
{ "INSERTING_AUTO_ATTRIBUTE", INSERTING_AUTO_ATTRIBUTE },
{ "MISSING_ATTR_VALUE", MISSING_ATTR_VALUE },
{ "BAD_ATTRIBUTE_VALUE", BAD_ATTRIBUTE_VALUE },
{ "UNEXPECTED_GT", UNEXPECTED_GT },
{ "PROPRIETARY_ATTRIBUTE", PROPRIETARY_ATTRIBUTE },
{ "PROPRIETARY_ATTR_VALUE", PROPRIETARY_ATTR_VALUE },
{ "REPEATED_ATTRIBUTE", REPEATED_ATTRIBUTE },
{ "MISSING_IMAGEMAP", MISSING_IMAGEMAP },
{ "XML_ATTRIBUTE_VALUE", XML_ATTRIBUTE_VALUE },
{ "UNEXPECTED_QUOTEMARK", UNEXPECTED_QUOTEMARK },
{ "MISSING_QUOTEMARK", MISSING_QUOTEMARK },
{ "ID_NAME_MISMATCH", ID_NAME_MISMATCH },
{ "BACKSLASH_IN_URI", BACKSLASH_IN_URI },
{ "FIXED_BACKSLASH", FIXED_BACKSLASH },
{ "ILLEGAL_URI_REFERENCE", ILLEGAL_URI_REFERENCE },
{ "ESCAPED_ILLEGAL_URI", ESCAPED_ILLEGAL_URI },
{ "NEWLINE_IN_URI", NEWLINE_IN_URI },
{ "ANCHOR_NOT_UNIQUE", ANCHOR_NOT_UNIQUE },
{ "JOINING_ATTRIBUTE", JOINING_ATTRIBUTE },
{ "UNEXPECTED_EQUALSIGN", UNEXPECTED_EQUALSIGN },
{ "ATTR_VALUE_NOT_LCASE", ATTR_VALUE_NOT_LCASE },
{ "XML_ID_SYNTAX", XML_ID_SYNTAX },
{ "INVALID_ATTRIBUTE", INVALID_ATTRIBUTE },
{ "BAD_ATTRIBUTE_VALUE_REPLACED", BAD_ATTRIBUTE_VALUE_REPLACED },
{ "INVALID_XML_ID", INVALID_XML_ID },
{ "UNEXPECTED_END_OF_FILE_ATTR", UNEXPECTED_END_OF_FILE_ATTR },
{ "MISSING_ATTRIBUTE", MISSING_ATTRIBUTE },
{ "WHITE_IN_URI", WHITE_IN_URI },
{ "REMOVED_HTML5", REMOVED_HTML5 },
{ "BAD_BODY_HTML5", BAD_BODY_HTML5 },
{ "BAD_ALIGN_HTML5", BAD_ALIGN_HTML5 },
{ "BAD_SUMMARY_HTML5", BAD_SUMMARY_HTML5 },
{ "PREVIOUS_LOCATION", PREVIOUS_LOCATION },
{ "VENDOR_SPECIFIC_CHARS", VENDOR_SPECIFIC_CHARS },
{ "INVALID_SGML_CHARS", INVALID_SGML_CHARS },
{ "INVALID_UTF8", INVALID_UTF8 },
{ "INVALID_UTF16", INVALID_UTF16 },
{ "ENCODING_MISMATCH", ENCODING_MISMATCH },
{ "INVALID_URI", INVALID_URI },
{ "INVALID_NCR", INVALID_NCR },
{ "CODES_TIDY_ERROR_LAST", CODES_TIDY_ERROR_LAST },
#if SUPPORT_ACCESSIBILITY_CHECKS
{ "FIRST_ACCESS_ERR", FIRST_ACCESS_ERR },
{ "IMG_MISSING_ALT", IMG_MISSING_ALT },
{ "IMG_ALT_SUSPICIOUS_FILENAME", IMG_ALT_SUSPICIOUS_FILENAME },
{ "IMG_ALT_SUSPICIOUS_FILE_SIZE", IMG_ALT_SUSPICIOUS_FILE_SIZE },
{ "IMG_ALT_SUSPICIOUS_PLACEHOLDER", IMG_ALT_SUSPICIOUS_PLACEHOLDER },
{ "IMG_ALT_SUSPICIOUS_TOO_LONG", IMG_ALT_SUSPICIOUS_TOO_LONG },
{ "IMG_MISSING_ALT_BULLET", IMG_MISSING_ALT_BULLET },
{ "IMG_MISSING_ALT_H_RULE", IMG_MISSING_ALT_H_RULE },
{ "IMG_MISSING_LONGDESC_DLINK", IMG_MISSING_LONGDESC_DLINK },
{ "IMG_MISSING_DLINK", IMG_MISSING_DLINK },
{ "IMG_MISSING_LONGDESC", IMG_MISSING_LONGDESC },
{ "LONGDESC_NOT_REQUIRED", LONGDESC_NOT_REQUIRED },
{ "IMG_BUTTON_MISSING_ALT", IMG_BUTTON_MISSING_ALT },
{ "APPLET_MISSING_ALT", APPLET_MISSING_ALT },
{ "OBJECT_MISSING_ALT", OBJECT_MISSING_ALT },
{ "AUDIO_MISSING_TEXT_WAV", AUDIO_MISSING_TEXT_WAV },
{ "AUDIO_MISSING_TEXT_AU", AUDIO_MISSING_TEXT_AU },
{ "AUDIO_MISSING_TEXT_AIFF", AUDIO_MISSING_TEXT_AIFF },
{ "AUDIO_MISSING_TEXT_SND", AUDIO_MISSING_TEXT_SND },
{ "AUDIO_MISSING_TEXT_RA", AUDIO_MISSING_TEXT_RA },
{ "AUDIO_MISSING_TEXT_RM", AUDIO_MISSING_TEXT_RM },
{ "FRAME_MISSING_LONGDESC", FRAME_MISSING_LONGDESC },
{ "AREA_MISSING_ALT", AREA_MISSING_ALT },
{ "SCRIPT_MISSING_NOSCRIPT", SCRIPT_MISSING_NOSCRIPT },
{ "ASCII_REQUIRES_DESCRIPTION", ASCII_REQUIRES_DESCRIPTION },
{ "IMG_MAP_SERVER_REQUIRES_TEXT_LINKS", IMG_MAP_SERVER_REQUIRES_TEXT_LINKS },
{ "MULTIMEDIA_REQUIRES_TEXT", MULTIMEDIA_REQUIRES_TEXT },
{ "IMG_MAP_CLIENT_MISSING_TEXT_LINKS", IMG_MAP_CLIENT_MISSING_TEXT_LINKS },
{ "INFORMATION_NOT_CONVEYED_IMAGE", INFORMATION_NOT_CONVEYED_IMAGE },
{ "INFORMATION_NOT_CONVEYED_APPLET", INFORMATION_NOT_CONVEYED_APPLET },
{ "INFORMATION_NOT_CONVEYED_OBJECT", INFORMATION_NOT_CONVEYED_OBJECT },
{ "INFORMATION_NOT_CONVEYED_SCRIPT", INFORMATION_NOT_CONVEYED_SCRIPT },
{ "INFORMATION_NOT_CONVEYED_INPUT", INFORMATION_NOT_CONVEYED_INPUT },
{ "COLOR_CONTRAST_TEXT", COLOR_CONTRAST_TEXT },
{ "COLOR_CONTRAST_LINK", COLOR_CONTRAST_LINK },
{ "COLOR_CONTRAST_ACTIVE_LINK", COLOR_CONTRAST_ACTIVE_LINK },
{ "COLOR_CONTRAST_VISITED_LINK", COLOR_CONTRAST_VISITED_LINK },
{ "DOCTYPE_MISSING", DOCTYPE_MISSING },
{ "STYLE_SHEET_CONTROL_PRESENTATION", STYLE_SHEET_CONTROL_PRESENTATION },
{ "HEADERS_IMPROPERLY_NESTED", HEADERS_IMPROPERLY_NESTED },
{ "POTENTIAL_HEADER_BOLD", POTENTIAL_HEADER_BOLD },
{ "POTENTIAL_HEADER_ITALICS", POTENTIAL_HEADER_ITALICS },
{ "POTENTIAL_HEADER_UNDERLINE", POTENTIAL_HEADER_UNDERLINE },
{ "HEADER_USED_FORMAT_TEXT", HEADER_USED_FORMAT_TEXT },
{ "LIST_USAGE_INVALID_UL", LIST_USAGE_INVALID_UL },
{ "LIST_USAGE_INVALID_OL", LIST_USAGE_INVALID_OL },
{ "LIST_USAGE_INVALID_LI", LIST_USAGE_INVALID_LI },
{ "INDICATE_CHANGES_IN_LANGUAGE", INDICATE_CHANGES_IN_LANGUAGE },
{ "LANGUAGE_NOT_IDENTIFIED", LANGUAGE_NOT_IDENTIFIED },
{ "LANGUAGE_INVALID", LANGUAGE_INVALID },
{ "DATA_TABLE_MISSING_HEADERS", DATA_TABLE_MISSING_HEADERS },
{ "DATA_TABLE_MISSING_HEADERS_COLUMN", DATA_TABLE_MISSING_HEADERS_COLUMN },
{ "DATA_TABLE_MISSING_HEADERS_ROW", DATA_TABLE_MISSING_HEADERS_ROW },
{ "DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS", DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS },
{ "DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS", DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS },
{ "LAYOUT_TABLES_LINEARIZE_PROPERLY", LAYOUT_TABLES_LINEARIZE_PROPERLY },
{ "LAYOUT_TABLE_INVALID_MARKUP", LAYOUT_TABLE_INVALID_MARKUP },
{ "TABLE_MISSING_SUMMARY", TABLE_MISSING_SUMMARY },
{ "TABLE_SUMMARY_INVALID_NULL", TABLE_SUMMARY_INVALID_NULL },
{ "TABLE_SUMMARY_INVALID_SPACES", TABLE_SUMMARY_INVALID_SPACES },
{ "TABLE_SUMMARY_INVALID_PLACEHOLDER", TABLE_SUMMARY_INVALID_PLACEHOLDER },
{ "TABLE_MISSING_CAPTION", TABLE_MISSING_CAPTION },
{ "TABLE_MAY_REQUIRE_HEADER_ABBR", TABLE_MAY_REQUIRE_HEADER_ABBR },
{ "TABLE_MAY_REQUIRE_HEADER_ABBR_NULL", TABLE_MAY_REQUIRE_HEADER_ABBR_NULL },
{ "TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES", TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES },
{ "STYLESHEETS_REQUIRE_TESTING_LINK", STYLESHEETS_REQUIRE_TESTING_LINK },
{ "STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT", STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT },
{ "STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR", STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR },
{ "FRAME_SRC_INVALID", FRAME_SRC_INVALID },
{ "TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET", TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET },
{ "TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT", TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT },
{ "TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT", TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT },
{ "PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT", PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT },
{ "PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT", PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT },
{ "PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED", PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED },
{ "PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET", PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET },
{ "FRAME_MISSING_NOFRAMES", FRAME_MISSING_NOFRAMES },
{ "NOFRAMES_INVALID_NO_VALUE", NOFRAMES_INVALID_NO_VALUE },
{ "NOFRAMES_INVALID_CONTENT", NOFRAMES_INVALID_CONTENT },
{ "NOFRAMES_INVALID_LINK", NOFRAMES_INVALID_LINK },
{ "REMOVE_FLICKER_SCRIPT", REMOVE_FLICKER_SCRIPT },
{ "REMOVE_FLICKER_OBJECT", REMOVE_FLICKER_OBJECT },
{ "REMOVE_FLICKER_EMBED", REMOVE_FLICKER_EMBED },
{ "REMOVE_FLICKER_APPLET", REMOVE_FLICKER_APPLET },
{ "REMOVE_FLICKER_ANIMATED_GIF", REMOVE_FLICKER_ANIMATED_GIF },
{ "REMOVE_BLINK_MARQUEE", REMOVE_BLINK_MARQUEE },
{ "REMOVE_AUTO_REFRESH", REMOVE_AUTO_REFRESH },
{ "REMOVE_AUTO_REDIRECT", REMOVE_AUTO_REDIRECT },
{ "ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT", ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT },
{ "ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT", ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT },
{ "ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET", ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET },
{ "ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED", ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED },
{ "IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION", IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION },
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN },
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP },
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK },
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER },
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT },
{ "SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE", SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE },
{ "NEW_WINDOWS_REQUIRE_WARNING_NEW", NEW_WINDOWS_REQUIRE_WARNING_NEW },
{ "NEW_WINDOWS_REQUIRE_WARNING_BLANK", NEW_WINDOWS_REQUIRE_WARNING_BLANK },
{ "LABEL_NEEDS_REPOSITIONING_BEFORE_INPUT", LABEL_NEEDS_REPOSITIONING_BEFORE_INPUT },
{ "LABEL_NEEDS_REPOSITIONING_AFTER_INPUT", LABEL_NEEDS_REPOSITIONING_AFTER_INPUT },
{ "FORM_CONTROL_REQUIRES_DEFAULT_TEXT", FORM_CONTROL_REQUIRES_DEFAULT_TEXT },
{ "FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL", FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL },
{ "FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES", FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES },
{ "REPLACE_DEPRECATED_HTML_APPLET", REPLACE_DEPRECATED_HTML_APPLET },
{ "REPLACE_DEPRECATED_HTML_BASEFONT", REPLACE_DEPRECATED_HTML_BASEFONT },
{ "REPLACE_DEPRECATED_HTML_CENTER", REPLACE_DEPRECATED_HTML_CENTER },
{ "REPLACE_DEPRECATED_HTML_DIR", REPLACE_DEPRECATED_HTML_DIR },
{ "REPLACE_DEPRECATED_HTML_FONT", REPLACE_DEPRECATED_HTML_FONT },
{ "REPLACE_DEPRECATED_HTML_ISINDEX", REPLACE_DEPRECATED_HTML_ISINDEX },
{ "REPLACE_DEPRECATED_HTML_MENU", REPLACE_DEPRECATED_HTML_MENU },
{ "REPLACE_DEPRECATED_HTML_S", REPLACE_DEPRECATED_HTML_S },
{ "REPLACE_DEPRECATED_HTML_STRIKE", REPLACE_DEPRECATED_HTML_STRIKE },
{ "REPLACE_DEPRECATED_HTML_U", REPLACE_DEPRECATED_HTML_U },
{ "FRAME_MISSING_TITLE", FRAME_MISSING_TITLE },
{ "FRAME_TITLE_INVALID_NULL", FRAME_TITLE_INVALID_NULL },
{ "FRAME_TITLE_INVALID_SPACES", FRAME_TITLE_INVALID_SPACES },
{ "ASSOCIATE_LABELS_EXPLICITLY", ASSOCIATE_LABELS_EXPLICITLY },
{ "ASSOCIATE_LABELS_EXPLICITLY_FOR", ASSOCIATE_LABELS_EXPLICITLY_FOR },
{ "ASSOCIATE_LABELS_EXPLICITLY_ID", ASSOCIATE_LABELS_EXPLICITLY_ID },
{ "LINK_TEXT_NOT_MEANINGFUL", LINK_TEXT_NOT_MEANINGFUL },
{ "LINK_TEXT_MISSING", LINK_TEXT_MISSING },
{ "LINK_TEXT_TOO_LONG", LINK_TEXT_TOO_LONG },
{ "LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE", LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE },
{ "LINK_TEXT_NOT_MEANINGFUL_MORE", LINK_TEXT_NOT_MEANINGFUL_MORE },
{ "LINK_TEXT_NOT_MEANINGFUL_FOLLOW_THIS", LINK_TEXT_NOT_MEANINGFUL_FOLLOW_THIS },
{ "METADATA_MISSING", METADATA_MISSING },
{ "METADATA_MISSING_LINK", METADATA_MISSING_LINK },
{ "METADATA_MISSING_REDIRECT_AUTOREFRESH", METADATA_MISSING_REDIRECT_AUTOREFRESH },
{ "SKIPOVER_ASCII_ART", SKIPOVER_ASCII_ART },
{ "LAST_ACCESS_ERR", LAST_ACCESS_ERR },
#endif
{ NULL, 0 },
};
/**
* Given an error code, return the string associated with it.
*/
ctmbstr tidyErrorCodeAsString(uint code)
{
uint i = 0;
while (tidyErrorFilterKeysStruct[i].key) {
if ( tidyErrorFilterKeysStruct[i].value == code )
return tidyErrorFilterKeysStruct[i].key;
i++;
}
return "UNDEFINED";
}
/**
* The real string lookup function.
*/
ctmbstr TY_(tidyLocalizedString)( uint messageType, languageDefinition *definition, uint plural )
{
int i;
languageDictionary *dictionary = &definition->messages;
uint pluralForm = definition->whichPluralForm(plural);
for (i = 0; (*dictionary)[i].value; ++i)
{
if ( (*dictionary)[i].key == messageType && (*dictionary)[i].pluralForm == pluralForm )
{
return (*dictionary)[i].value;
}
}
return NULL;
}
/**
* Provides a string given `messageType` in the current
* localization, returning the correct plural form given
* `quantity`.
*
* This isn't currently highly optimized; rewriting some
* of infrastructure to use hash lookups is a preferred
* future optimization.
*/
ctmbstr tidyLocalizedStringN( uint messageType, uint quantity )
{
ctmbstr result;
result = TY_(tidyLocalizedString)( messageType, tidyLanguages.currentLanguage, quantity);
if (!result && tidyLanguages.fallbackLanguage )
{
result = TY_(tidyLocalizedString)( messageType, tidyLanguages.fallbackLanguage, quantity);
}
if (!result)
{
/* Fallback to en which is built in. */
result = TY_(tidyLocalizedString)( messageType, &language_en, quantity);
}
if (!result)
{
/* Last resort: Fallback to en singular which is built in. */
result = TY_(tidyLocalizedString)( messageType, &language_en, 1);
}
return result;
}
/**
* Provides a string given `messageType` in the current
* localization, in the non-plural form.
*
* This isn't currently highly optimized; rewriting some
* of infrastructure to use hash lookups is a preferred
* future optimization.
*/
ctmbstr tidyLocalizedString( uint messageType )
{
return tidyLocalizedStringN( messageType, 1 );
}
/**
** Determines the current locale without affecting the C locale.
** Tidy has always used the default C locale, and at this point
** in its development we're not going to tamper with that.
** @param result The buffer to use to return the string.
** Returns NULL on failure.
** @return The same buffer for convenience.
*/
tmbstr tidySystemLocale(tmbstr result)
{
ctmbstr temp;
/* This should set the OS locale. */
setlocale( LC_ALL, "" );
/* This should read the current locale. */
temp = setlocale( LC_ALL, NULL);
/* Make a new copy of the string, because temp
always points to the current locale. */
if (( result = malloc( strlen( temp ) + 1 ) ))
strcpy(result, temp);
/* This should restore the C locale. */
setlocale( LC_ALL, "C" );
return result;
}
/**
* Retrieves the POSIX name for a string. Result is a static char so please
* don't try to free it. If the name looks like a cc_ll identifier, we will
* return it if there's no other match.
*/
tmbstr tidyNormalizedLocaleName( ctmbstr locale )
{
uint i;
uint len;
static char result[6] = "xx_yy";
char character[1];
tmbstr search = strdup(locale);
search = TY_(tmbstrtolower)(search);
/* See if our string matches a Windows name. */
for (i = 0; localeMappings[i].winName; ++i)
{
if ( strcmp( localeMappings[i].winName, search ) == 0 )
{
free(search);
search = strdup(localeMappings[i].POSIXName);
break;
}
}
/* We're going to be stupid about this and trust the user, and
return just the first two characters if they exist and the
4th and 5th if they exist. The worst that can happen is a
junk language that doesn't exist and won't be set. */
len = strlen( search );
len = len <= 5 ? len : 5;
for ( i = 0; i < len; i++ )
{
if ( i == 2 )
{
/* Either terminate the string or ensure there's an underscore */
if (strlen( search) >= 5)
character[0] = '_';
else
character[0] = '\0';
strncpy( result + i, character, 1 );
}
else
{
strncpy( result + i, search + i, 1);
result[i] = tolower( result[i] );
}
}
if ( search ) free( search );
return result;
}
/**
* Returns the languageDefinition if the languageCode is installed in Tidy,
* otherwise return NULL
*/
languageDefinition *TY_(tidyTestLanguage)( ctmbstr languageCode )
{
uint i;
languageDefinition *testLang;
languageDictionary *testDict;
ctmbstr testCode;
for (i = 0; tidyLanguages.languages[i]; ++i)
{
testLang = tidyLanguages.languages[i];
testDict = &testLang->messages;
testCode = (*testDict)[0].value;
if ( strcmp(testCode, languageCode) == 0 )
return testLang;
}
return NULL;
}
/**
* Tells Tidy to use a different language for output.
* @param languageCode A Windows or POSIX language code, and must match
* a TIDY_LANGUAGE for an installed language.
* @result Indicates that a setting was applied, but not necessarily the
* specific request, i.e., true indicates a language and/or region
* was applied. If es_mx is requested but not installed, and es is
* installed, then es will be selected and this function will return
* true. However the opposite is not true; if es is requested but
* not present, Tidy will not try to select from the es_XX variants.
*/
Bool tidySetLanguage( ctmbstr languageCode )
{
languageDefinition *dict1 = NULL;
languageDefinition *dict2 = NULL;
tmbstr wantCode = NULL;
char lang[3] = "";
if ( !languageCode || !(wantCode = tidyNormalizedLocaleName( languageCode )) )
{
return no;
}
/* We want to use the specified language as the currentLanguage, and set
fallback language as necessary. We have either a two or five digit code,
either or both of which might be installed. Let's test both of them:
*/
dict1 = TY_(tidyTestLanguage( wantCode )); /* WANTED language */
if ( strlen( wantCode ) > 2 )
{
strncpy(lang, wantCode, 2);
lang[2] = '\0';
dict2 = TY_(tidyTestLanguage( lang ) ); /* BACKUP language? */
}
if ( dict1 && dict2 )
{
tidyLanguages.currentLanguage = dict1;
tidyLanguages.fallbackLanguage = dict2;
}
if ( dict1 && !dict2 )
{
tidyLanguages.currentLanguage = dict1;
tidyLanguages.fallbackLanguage = NULL;
}
if ( !dict1 && dict2 )
{
tidyLanguages.currentLanguage = dict2;
tidyLanguages.fallbackLanguage = NULL;
}
if ( !dict1 && !dict2 )
{
/* No change. */
}
return dict1 || dict2;
}
/**
* Gets the current language used by Tidy.
*/
ctmbstr tidyGetLanguage()
{
languageDefinition *langDef = tidyLanguages.currentLanguage;
languageDictionary *langDict = &langDef->messages;
return (*langDict)[0].value;
}
/**
* Provides a string given `messageType` in the default
* localization (which is `en`), for single plural form.
*/
ctmbstr tidyDefaultString( uint messageType )
{
return TY_(tidyLocalizedString)( messageType, &language_en, 1);
}
/**
* Determines the true size of the `language_en` array indicating the
* number of items in the array, _not_ the highest index.
*/
const uint TY_(tidyStringKeyListSize)()
{
static uint array_size = 0;
if ( array_size == 0 )
{
while ( language_en.messages[array_size].value != NULL ) {
array_size++;
}
}
return array_size;
}
/*
* Initializes the TidyIterator to point to the first item
* in Tidy's list of localization string keys. Note that
* these are provided for documentation generation purposes
* and probably aren't useful for LibTidy implementors.
*/
TidyIterator getStringKeyList()
{
return (TidyIterator)(size_t)1;
}
/*
* Provides the next key value in Tidy's list of localized
* strings. Note that these are provided for documentation
* generation purposes and probably aren't useful to
* libtidy implementors.
*/
uint getNextStringKey( TidyIterator* iter )
{
uint item = 0;
size_t itemIndex;
assert( iter != NULL );
itemIndex = (size_t)*iter;
if ( itemIndex > 0 && itemIndex <= TY_(tidyStringKeyListSize)() )
{
item = language_en.messages[ itemIndex - 1 ].key;
itemIndex++;
}
*iter = (TidyIterator)( itemIndex <= TY_(tidyStringKeyListSize)() ? itemIndex : (size_t)0 );
return item;
}
/**
* Determines the true size of the `localeMappings` array indicating the
* number of items in the array, _not_ the highest index.
*/
const uint TY_(tidyLanguageListSize)()
{
static uint array_size = 0;
if ( array_size == 0 )
{
while ( localeMappings[array_size].winName ) {
array_size++;
}
}
return array_size;
}
/**
* Initializes the TidyIterator to point to the first item
* in Tidy's structure of Windows<->POSIX local mapping.
* Items can be retrieved with getNextWindowsLanguage();
*/
TidyIterator getWindowsLanguageList()
{
return (TidyIterator)(size_t)1;
}
/**
* Returns the next record of type `localeMapItem` in
* Tidy's structure of Windows<->POSIX local mapping.
*/
const tidyLocaleMapItem *getNextWindowsLanguage( TidyIterator *iter )
{
const tidyLocaleMapItem *item = NULL;
size_t itemIndex;
assert( iter != NULL );
itemIndex = (size_t)*iter;
if ( itemIndex > 0 && itemIndex <= TY_(tidyLanguageListSize)() )
{
item = &localeMappings[ itemIndex -1 ];
itemIndex++;
}
*iter = (TidyIterator)( itemIndex <= TY_(tidyLanguageListSize)() ? itemIndex : (size_t)0 );
return item;
}
/**
* Determines the number of languages installed in Tidy.
*/
const uint TY_(tidyInstalledLanguageListSize)()
{
static uint array_size = 0;
if ( array_size == 0 )
{
while ( tidyLanguages.languages[array_size] ) {
array_size++;
}
}
return array_size;
}
/**
* Initializes the TidyIterator to point to the first item
* in Tidy's list of installed language codes.
* Items can be retrieved with getNextInstalledLanguage();
*/
TidyIterator getInstalledLanguageList()
{
return (TidyIterator)(size_t)1;
}
/**
* Returns the next installed language.
*/
ctmbstr getNextInstalledLanguage( TidyIterator* iter )
{
ctmbstr item = NULL;
size_t itemIndex;
assert( iter != NULL );
itemIndex = (size_t)*iter;
if ( itemIndex > 0 && itemIndex <= TY_(tidyInstalledLanguageListSize)() )
{
item = tidyLanguages.languages[itemIndex - 1]->messages[0].value;
itemIndex++;
}
*iter = (TidyIterator)( itemIndex <= TY_(tidyInstalledLanguageListSize)() ? itemIndex : (size_t)0 );
return item;
}

301
src/language.h Normal file
View file

@ -0,0 +1,301 @@
#ifndef language_h
#define language_h
/*
* language.h
* Localization support for HTML Tidy.
* This header provides the public (within libtidy) interface
* to basic localization support. To add your own localization
* create a new `language_xx.h` file and add it to the struct
* in `language.c`.
*
* (c) 2015 HTACG
* See tidy.h and access.h for the copyright notice.
*
* Created by Jim Derry on 11/28/15.
*/
#include "tidyplatform.h"
/** @name Exposed Data Structures */
/** @{ */
/**
* Describes a record for a localization string.
* - key must correspond with one of Tidy's enums (see `tidyMessageTypes`
* below)
* - pluralForm corresponds to gettext plural forms case (not singularity).
* Most entries should be case 0, representing the single case.:
* https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
*/
typedef struct languageDictionaryEntry {
uint key;
uint pluralForm;
ctmbstr value;
} languageDictionaryEntry;
/**
* For now we'll just use an array to hold all of the dictionary
* entries. In the future we can convert this to a hash structure
* which will make looking up strings faster.
*/
typedef languageDictionaryEntry const languageDictionary[600];
/**
* Finally, a complete language definition. The item `pluralForm`
* is a function pointer that will provide the correct plural
* form given the value `n`. The actual function is present in
* each language header and is language dependent.
*/
typedef struct languageDefinition {
uint (*whichPluralForm)(uint n);
languageDictionary messages;
} languageDefinition;
/**
* The function getNextWindowsLanguage() returns pointers to this type;
* it gives LibTidy implementors the ability to determine how Windows
* locale names are mapped to POSIX language codes.
*/
typedef struct tidyLocaleMapItem {
ctmbstr winName;
ctmbstr POSIXName;
} tidyLocaleMapItem;
/**
* Defines all of the possible dictionary keys.
* The starting value is arbitrary but must prevent overlaps
* with other enums that are used for retrieving strings. The
* comprehensive list of enums for which we provides strings
* is as follows:
* - `tidyMessageTypes` in this file, start == 4096.
* - `tidyErrorCodes` from `message.h`, start == 200.
* - `accessErrorCodes` from `access.h`, start == CODES_TIDY_ERROR_LAST+1.
* - `tidyMessagesMisc` from `message.h`, start == 2048.
* - `TidyOptionId` from `tidyEnum.h`, start == 0 (important!).
* - `TidyReportLevelKeys` from `tidyEnum.h`, start == 600.
* - ...
* You should never count on the value of a label being
* constant. Accordingly feel free to arrange new enum
* values in the most appropriate grouping below.
*/
typedef enum
{
/* This MUST be present and first. */
TIDY_MESSAGE_TYPE_FIRST = 4096,
/* Specify the code for this language. */
TIDY_LANGUAGE,
/* Localization test strings. */
TEST_PRESENT_IN_BASE,
TEST_PRESENT_IN_REGION,
/* Strings for the console application. */
TC_CAT_DIAGNOSTICS,
TC_CAT_ENCODING,
TC_CAT_MARKUP,
TC_CAT_MISC,
TC_CAT_PRETTYPRINT,
TC_LABEL_COL,
TC_LABEL_FILE,
TC_LABEL_LANG,
TC_LABEL_LEVL,
TC_LABEL_OPT,
TC_MAIN_ERROR_LOAD_CONFIG,
TC_OPT_ACCESS,
TC_OPT_ASCII,
TC_OPT_ASHTML,
TC_OPT_ASXML,
TC_OPT_BARE,
TC_OPT_BIG5,
TC_OPT_CLEAN,
TC_OPT_CONFIG,
TC_OPT_ERRORS,
TC_OPT_FILE,
TC_OPT_GDOC,
TC_OPT_HELP,
TC_OPT_HELPCFG,
TC_OPT_HELPOPT,
TC_OPT_IBM858,
TC_OPT_INDENT,
TC_OPT_ISO2022,
TC_OPT_LANGUAGE,
TC_OPT_LATIN0,
TC_OPT_LATIN1,
TC_OPT_MAC,
TC_OPT_MODIFY,
TC_OPT_NUMERIC,
TC_OPT_OMIT,
TC_OPT_OUTPUT,
TC_OPT_QUIET,
TC_OPT_RAW,
TC_OPT_SHIFTJIS,
TC_OPT_SHOWCFG,
TC_OPT_UPPER,
TC_OPT_UTF16,
TC_OPT_UTF16BE,
TC_OPT_UTF16LE,
TC_OPT_UTF8,
TC_OPT_VERSION,
TC_OPT_WIN1252,
TC_OPT_WRAP,
TC_OPT_XML,
TC_OPT_XMLCFG,
TC_OPT_XMLSTRG,
TC_OPT_XMLOPTS,
TC_OPT_XMLHELP,
TC_STRING_CONF_HEADER,
TC_STRING_CONF_NAME,
TC_STRING_CONF_TYPE,
TC_STRING_CONF_VALUE,
TC_STRING_CONF_NOTE,
TC_STRING_OPT_NOT_DOCUMENTED,
TC_STRING_OUT_OF_MEMORY,
TC_STRING_FATAL_ERROR,
TC_STRING_FILE_MANIP,
TC_STRING_LANG_MUST_SPECIFY,
TC_STRING_LANG_NOT_FOUND,
TC_STRING_MUST_SPECIFY,
TC_STRING_PROCESS_DIRECTIVES,
TC_STRING_CHAR_ENCODING,
TC_STRING_MISC,
TC_STRING_XML,
TC_STRING_UNKNOWN_OPTION,
TC_STRING_UNKNOWN_OPTION_B,
TC_STRING_VERS_A,
TC_STRING_VERS_B,
TC_TXT_HELP_1,
TC_TXT_HELP_2A,
TC_TXT_HELP_2B,
TC_TXT_HELP_3,
TC_TXT_HELP_CONFIG,
TC_TXT_HELP_CONFIG_NAME,
TC_TXT_HELP_CONFIG_TYPE,
TC_TXT_HELP_CONFIG_ALLW,
TC_TXT_HELP_LANG_1,
TC_TXT_HELP_LANG_2,
TC_TXT_HELP_LANG_3,
/* This MUST be present and last. */
TIDY_MESSAGE_TYPE_LAST
} tidyMessageTypes;
/**
* LibTidy users may want to use `TidyReportFilter3` to enable their own
* localization lookup features. Because Tidy's errors codes are enums the
* specific values can change over time. This function returns a string
* representing the enum value name that can be used as a lookup key
* independent of changing string values (TidyReportFiler2 is vulnerable
* to changing strings). `TidyReportFilter3` will return this general
* string as the error message indicator.
*/
ctmbstr tidyErrorCodeAsString(uint code);
/** @} */
/** @name Localization Related Functions */
/** @{ */
/**
** Determines the current locale without affecting the C locale.
** Tidy has always used the default C locale, and at this point
** in its development we're not going to tamper with that.
** @param result The buffer to use to return the string.
** Returns NULL on failure.
** @return The same buffer for convenience.
*/
tmbstr tidySystemLocale(tmbstr result);
/**
* Tells Tidy to use a different language for output.
* @param languageCode A Windows or POSIX language code, and must match
* a TIDY_LANGUAGE for an installed language.
* @result Indicates that a setting was applied, but not necessarily the
* specific request, i.e., true indicates a language and/or region
* was applied. If es_mx is requested but not installed, and es is
* installed, then es will be selected and this function will return
* true. However the opposite is not true; if es is requested but
* not present, Tidy will not try to select from the es_XX variants.
*/
Bool tidySetLanguage( ctmbstr languageCode );
/**
* Gets the current language used by Tidy.
*/
ctmbstr tidyGetLanguage();
/**
* Provides a string given `messageType` in the current
* localization for `quantity`.
*/
ctmbstr tidyLocalizedStringN( uint messageType, uint quantity );
/**
* Provides a string given `messageType` in the current
* localization for the single case.
*/
ctmbstr tidyLocalizedString( uint messageType );
/** @} */
/** @name Documentation Generation */
/** @{ */
/**
* Provides a string given `messageType` in the default
* localization (which is `en`).
*/
ctmbstr tidyDefaultString( uint messageType );
/*
* Initializes the TidyIterator to point to the first item
* in Tidy's list of localization string keys. Note that
* these are provided for documentation generation purposes
* and probably aren't useful for LibTidy implementors.
*/
TidyIterator getStringKeyList();
/*
* Provides the next key value in Tidy's list of localized
* strings. Note that these are provided for documentation
* generation purposes and probably aren't useful to
* libtidy implementors.
*/
uint getNextStringKey( TidyIterator* iter );
/**
* Initializes the TidyIterator to point to the first item
* in Tidy's structure of Windows<->POSIX local mapping.
* Items can be retrieved with getNextWindowsLanguage();
*/
TidyIterator getWindowsLanguageList();
/**
* Returns the next record of type `localeMapItem` in
* Tidy's structure of Windows<->POSIX local mapping.
*/
const tidyLocaleMapItem *getNextWindowsLanguage( TidyIterator* iter );
/**
* Initializes the TidyIterator to point to the first item
* in Tidy's list of installed language codes.
* Items can be retrieved with getNextInstalledLanguage();
*/
TidyIterator getInstalledLanguageList();
/**
* Returns the next installed language.
*/
ctmbstr getNextInstalledLanguage( TidyIterator* iter );
/** @} */
#endif /* language_h */

1842
src/language_en.h Normal file

File diff suppressed because it is too large Load diff

146
src/language_en_gb.h Normal file
View file

@ -0,0 +1,146 @@
#ifndef language_en_gb_h
#define language_en_gb_h
/*
* language_en_gb.h
* Localization support for HTML Tidy.
*
*
* This file is a localization file for HTML Tidy. It will have been machine
* generated or created and/or edited by hand. Both are valid options, but
* please help keep our localization efforts simple to maintain by maintaining
* the structure of this file, and changing the check box below if you make
* changes (so others know the file origin):
*
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the
* language (and maybe region) "en_gb". The source of
* these strings is a gettext PO file in Tidy's source, probably called
* "language_en_gb.po".
*
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file
* directly (and check this box). If you prefer to edit PO files then use
* `poconvert.rb msgunfmt language_en_gb.h` (our own
* conversion tool) to generate a fresh PO from this file first!
*
* (c) 2015 HTACG
* See tidy.h and access.h for the copyright notice.
*
* Template Created by Jim Derry on 01/14/2016.
*
* Orginating PO file metadata:
* PO_LAST_TRANSLATOR=jderry
* PO_REVISION_DATE=2016-01-29 10:54:42
*/
#ifdef _MSC_VER
#pragma execution_character_set("utf-8")
#endif
#include "language.h"
#include "access.h"
#include "message.h"
/**
* This language-specific function returns the correct pluralForm
* to use given n items, and is used as a member of each language
* definition.
*/
static uint whichPluralForm_en_gb(uint n) {
/* Plural-Forms: nplurals=2; */
return n != 1;
}
/**
* This structure specifies all of the strings needed by Tidy for a
* single language. Static definition in a header file makes it
* easy to include and exclude languages without tinkering with
* the build system.
*/
static languageDefinition language_en_gb = { whichPluralForm_en_gb, {
/***************************************
** This MUST be present and first.
** Specify the code for this language.
***************************************/
{/* Specify the ll or ll_cc language code here. */
TIDY_LANGUAGE, 0, "en_gb"
},
{/* This console output should be limited to 78 characters per line. */
TEXT_USING_FONT, 0,
"You are recommended to use CSS to specify the font and\n"
"properties such as its size and colour. This will reduce\n"
"the size of HTML files and make them easier to maintain\n"
"compared with using <FONT> elements.\n\n"
},
{/* This console output should be limited to 78 characters per line. */
TEXT_USING_BODY, 0, "You are recommended to use CSS to specify page and link colours\n"
},
{/* This console output should be limited to 78 characters per line.
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TEXT_GENERAL_INFO_PLEA, 0,
"\n"
"Would you like to see Tidy in proper, British English? Please consider \n"
"helping us to localise HTML Tidy. For details please see \n"
"https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md\n"
},
#if SUPPORT_ACCESSIBILITY_CHECKS
{ INFORMATION_NOT_CONVEYED_IMAGE, 0, "[2.1.1.1]: ensure information not conveyed through colour alone (image)." },
{ INFORMATION_NOT_CONVEYED_APPLET, 0, "[2.1.1.2]: ensure information not conveyed through colour alone (applet)." },
{ INFORMATION_NOT_CONVEYED_OBJECT, 0, "[2.1.1.3]: ensure information not conveyed through colour alone (object)." },
{ INFORMATION_NOT_CONVEYED_SCRIPT, 0, "[2.1.1.4]: ensure information not conveyed through colour alone (script)." },
{ INFORMATION_NOT_CONVEYED_INPUT, 0, "[2.1.1.5]: ensure information not conveyed through colour alone (input)." },
{ COLOR_CONTRAST_TEXT, 0, "[2.2.1.1]: poor colour contrast (text)." },
{ COLOR_CONTRAST_LINK, 0, "[2.2.1.2]: poor colour contrast (link)." },
{ COLOR_CONTRAST_ACTIVE_LINK, 0, "[2.2.1.3]: poor colour contrast (active link)." },
{ COLOR_CONTRAST_VISITED_LINK, 0, "[2.2.1.4]: poor colour contrast (visited link)." },
#endif /* SUPPORT_ACCESSIBILITY_CHECKS */
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
It's very important that <br/> be self-closing in this manner!
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyMergeDivs, 0,
"This option can be used to modify the behaviour of <code>clean</code> when "
"set to <code>yes</code>."
"<br/>"
"This option specifies if Tidy should merge nested <code>&lt;div&gt;</code> "
"such as <code>&lt;div&gt;&lt;div&gt;...&lt;/div&gt;&lt;/div&gt;</code>. "
"<br/>"
"If set to <code>auto</code> the attributes of the inner "
"<code>&lt;div&gt;</code> are moved to the outer one. Nested "
"<code>&lt;div&gt;</code> with <code>id</code> attributes are <em>not</em> "
"merged. "
"<br/>"
"If set to <code>yes</code> the attributes of the inner "
"<code>&lt;div&gt;</code> are discarded with the exception of "
"<code>class</code> and <code>style</code>. "
},
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
It's very important that <br/> be self-closing in this manner!
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyMergeSpans, 0,
"This option can be used to modify the behaviour of <code>clean</code> when "
"set to <code>yes</code>."
"<br/>"
"This option specifies if Tidy should merge nested <code>&lt;span&gt;</code> "
"such as <code>&lt;span&gt;&lt;span&gt;...&lt;/span&gt;&lt;/span&gt;</code>. "
"<br/>"
"The algorithm is identical to the one used by <code>merge-divs</code>. "
},
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
It's very important that <br/> be self-closing in this manner!
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyReplaceColor, 0,
"This option specifies if Tidy should replace numeric values in colour "
"attributes with HTML/XHTML colour names where defined, e.g. replace "
"<code>#ffffff</code> with <code>white</code>. "
},
{/* This MUST be present and last. */
TIDY_MESSAGE_TYPE_LAST, 0, NULL
}
}};
#endif /* language_en_gb_h */

149
src/language_es.h Normal file
View file

@ -0,0 +1,149 @@
#ifndef language_es_h
#define language_es_h
/*
* language_es.h
* Localization support for HTML Tidy.
*
*
* This file is a localization file for HTML Tidy. It will have been machine
* generated or created and/or edited by hand. Both are valid options, but
* please help keep our localization efforts simple to maintain by maintaining
* the structure of this file, and changing the check box below if you make
* changes (so others know the file origin):
*
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the
* language (and maybe region) "es". The source of
* these strings is a gettext PO file in Tidy's source, probably called
* "language_es.po".
*
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file
* directly (and check this box). If you prefer to edit PO files then use
* `poconvert.rb msgunfmt language_es.h` (our own
* conversion tool) to generate a fresh PO from this file first!
*
* (c) 2015 HTACG
* See tidy.h and access.h for the copyright notice.
*
* Template Created by Jim Derry on 01/14/2016.
*
* Orginating PO file metadata:
* PO_LAST_TRANSLATOR=jderry
* PO_REVISION_DATE=2016-01-29 10:54:42
*/
#ifdef _MSC_VER
#pragma execution_character_set("utf-8")
#endif
#include "language.h"
#include "access.h"
#include "message.h"
/**
* This language-specific function returns the correct pluralForm
* to use given n items, and is used as a member of each language
* definition.
*/
static uint whichPluralForm_es(uint n) {
/* Plural-Forms: nplurals=2; */
return n != 1;
}
/**
* This structure specifies all of the strings needed by Tidy for a
* single language. Static definition in a header file makes it
* easy to include and exclude languages without tinkering with
* the build system.
*/
static languageDefinition language_es = { whichPluralForm_es, {
/***************************************
** This MUST be present and first.
** Specify the code for this language.
***************************************/
{/* Specify the ll or ll_cc language code here. */
TIDY_LANGUAGE, 0, "es"
},
{/* This console output should be limited to 78 characters per line.
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TEXT_GENERAL_INFO_PLEA, 0,
"\n"
"¿Le gustaría ver Tidy en un español correcto? Por favor considere \n"
"ayudarnos a localizar HTML Tidy. Para más detalles consulte \n"
"https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md \n"
},
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
It's very important that <br/> be self-closing in this manner!
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyMakeClean, 0,
"Esta opción especifica si Tidy debe realizar la limpieza de algún legado etiquetas de "
"presentación (actualmente <code>&lt;i&gt;</code>, <code>&lt;b&gt;</code>, <code>&lt;center&gt;</"
"code> cuando encerrados dentro de las etiquetas apropiadas en línea y <code>&lt;font&gt;</"
"code>). Si se establece en <code>yes</code>, entonces etiquetas existentes serán reemplazados "
"con CSS <code>&lt;style&gt;</code> y estructural markup según corresponda. "
},
#if SUPPORT_ASIAN_ENCODINGS
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
It's very important that <br/> be self-closing in this manner!
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyNCR, 0, "Esta opción especifica si Tidy debe permitir referencias de caracteres numéricos. "
},
#endif /* SUPPORT_ASIAN_ENCODINGS */
{/* This console output should be limited to 78 characters per line.
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TC_TXT_HELP_LANG_1, 0,
"\n"
"La opción --language (o --lang) indica el lenguaje Tidy debe \n"
"utilizar para comunicar su salida. Tenga en cuenta que esto no es \n"
"un servicio de traducción de documentos, y sólo afecta a los mensajes \n"
"que Tidy comunica a usted. \n"
"\n"
"Cuando se utiliza la línea de comandos el argumento --language debe \n"
"utilizarse antes de cualquier argumento que dan lugar a la producción, \n"
"de lo contrario Tidy producirá la salida antes de que se conozca el \n"
"idioma a utilizar. \n"
"\n"
"Además de los códigos de idioma estándar POSIX, Tidy es capaz de \n"
"entender códigos de idioma legados de Windows. Tenga en cuenta que \n"
"este lista indica los códigos Tidy entiende, y no indica que \n"
"actualmente el idioma está instalado. \n"
"\n"
"La columna más a la derecha indica cómo Tidy comprenderá el \n"
"legado nombre de Windows.\n"
"\n"
},
{/* This console output should be limited to 78 characters per line.
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TC_TXT_HELP_LANG_2, 0,
"\n"
"Los siguientes idiomas están instalados actualmente en Tidy. Tenga \n"
"en cuenta que no hay garantía de que están completos; sólo quiere decir \n"
"que un desarrollador u otro comenzaron a añadir el idioma indicado. \n"
"\n"
"Localizaciones incompletas por defecto se usan \"en\" cuando sea \n"
"necesario. ¡Favor de informar los desarrolladores de estes casos! \n"
"\n"
},
{/* This console output should be limited to 78 characters per line.
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TC_TXT_HELP_LANG_3, 0,
"\n"
"Si Tidy es capaz de determinar la configuración regional entonces \n"
"Tidy utilizará el lenguaje de forma automática de la configuración \n"
"regional. Por ejemplo los sistemas de tipo Unix utilizan los variables \n"
"$LANG y/o $LC_ALL. Consulte a su documentación del sistema para \n"
"obtener más información.\n"
"\n"
},
{/* This MUST be present and last. */
TIDY_MESSAGE_TYPE_LAST, 0, NULL
}
}};
#endif /* language_es_h */

84
src/language_es_mx.h Normal file
View file

@ -0,0 +1,84 @@
#ifndef language_es_mx_h
#define language_es_mx_h
/*
* language_es_mx.h
* Localization support for HTML Tidy.
*
*
* This file is a localization file for HTML Tidy. It will have been machine
* generated or created and/or edited by hand. Both are valid options, but
* please help keep our localization efforts simple to maintain by maintaining
* the structure of this file, and changing the check box below if you make
* changes (so others know the file origin):
*
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the
* language (and maybe region) "es_mx". The source of
* these strings is a gettext PO file in Tidy's source, probably called
* "language_es_mx.po".
*
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file
* directly (and check this box). If you prefer to edit PO files then use
* `poconvert.rb msgunfmt language_es_mx.h` (our own
* conversion tool) to generate a fresh PO from this file first!
*
* (c) 2015 HTACG
* See tidy.h and access.h for the copyright notice.
*
* Template Created by Jim Derry on 01/14/2016.
*
* Orginating PO file metadata:
* PO_LAST_TRANSLATOR=jderry
* PO_REVISION_DATE=2016-01-29 10:54:42
*/
#ifdef _MSC_VER
#pragma execution_character_set("utf-8")
#endif
#include "language.h"
#include "access.h"
#include "message.h"
/**
* This language-specific function returns the correct pluralForm
* to use given n items, and is used as a member of each language
* definition.
*/
static uint whichPluralForm_es_mx(uint n) {
/* Plural-Forms: nplurals=2; */
return n != 1;
}
/**
* This structure specifies all of the strings needed by Tidy for a
* single language. Static definition in a header file makes it
* easy to include and exclude languages without tinkering with
* the build system.
*/
static languageDefinition language_es_mx = { whichPluralForm_es_mx, {
/***************************************
** This MUST be present and first.
** Specify the code for this language.
***************************************/
{/* Specify the ll or ll_cc language code here. */
TIDY_LANGUAGE, 0, "es_mx"
},
{/* This console output should be limited to 78 characters per line.
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TEXT_GENERAL_INFO_PLEA, 0,
"\n"
"¿Le gustaría ver Tidy en adecuada, español mexicano? Por favor considere \n"
"ayudarnos a localizar HTML Tidy. Para más detalles consulte \n"
"https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md \n"
},
{/* This MUST be present and last. */
TIDY_MESSAGE_TYPE_LAST, 0, NULL
}
}};
#endif /* language_es_mx_h */

85
src/language_zh_cn.h Normal file
View file

@ -0,0 +1,85 @@
#ifndef language_zh_cn_h
#define language_zh_cn_h
/*
* language_zh_cn.h
* Localization support for HTML Tidy.
*
*
* This file is a localization file for HTML Tidy. It will have been machine
* generated or created and/or edited by hand. Both are valid options, but
* please help keep our localization efforts simple to maintain by maintaining
* the structure of this file, and changing the check box below if you make
* changes (so others know the file origin):
*
* [X] THIS FILE IS MACHINE GENERATED. It is a localization file for the
* language (and maybe region) "zh_cn". The source of
* these strings is a gettext PO file in Tidy's source, probably called
* "language_zh_cn.po".
*
* [ ] THIS FILE WAS HAND MODIFIED. Translators, please feel to edit this file
* directly (and check this box). If you prefer to edit PO files then use
* `poconvert.rb msgunfmt language_zh_cn.h` (our own
* conversion tool) to generate a fresh PO from this file first!
*
* (c) 2015 HTACG
* See tidy.h and access.h for the copyright notice.
*
* Template Created by Jim Derry on 01/14/2016.
*
* Orginating PO file metadata:
* PO_LAST_TRANSLATOR=jderry
* PO_REVISION_DATE=2016-01-29 10:54:42
*/
#ifdef _MSC_VER
#pragma execution_character_set("utf-8")
#endif
#include "language.h"
#include "access.h"
#include "message.h"
/**
* This language-specific function returns the correct pluralForm
* to use given n items, and is used as a member of each language
* definition.
*/
static uint whichPluralForm_zh_cn(uint n) {
/* Plural-Forms: nplurals=1; */
return 0;
}
/**
* This structure specifies all of the strings needed by Tidy for a
* single language. Static definition in a header file makes it
* easy to include and exclude languages without tinkering with
* the build system.
*/
static languageDefinition language_zh_cn = { whichPluralForm_zh_cn, {
/***************************************
** This MUST be present and first.
** Specify the code for this language.
***************************************/
{/* Specify the ll or ll_cc language code here. */
TIDY_LANGUAGE, 0, "zh_cn"
},
{ FILE_CANT_OPEN, 0, "无法打开”%s”\n" },
{ LINE_COLUMN_STRING, 0, "行 %d 列 %d - " },
{ STRING_CONTENT_LOOKS, 0, "文档内容看起来像 %s" },
{/* The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TC_STRING_VERS_A, 0, "HTML Tidy 用于 %s 版本 %s"
},
{/* The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TC_STRING_VERS_B, 0, "HTML Tidy 版本 %s"
},
{/* This MUST be present and last. */
TIDY_MESSAGE_TYPE_LAST, 0, NULL
}
}};
#endif /* language_zh_cn_h */

File diff suppressed because it is too large Load diff

1054
src/message.c Executable file

File diff suppressed because it is too large Load diff

View file

@ -10,28 +10,27 @@
#include "forward.h" #include "forward.h"
#include "tidy.h" /* For TidyReportLevel */ #include "tidy.h" /* For TidyReportLevel */
#include "language.h"
/* General message writing routines. /* General message writing routines.
** Each message is a single warning, error, etc. ** Each message is a single warning, error, etc.
** **
** This routine will keep track of counts and, ** These routines keep track of counts and,
** if the caller has set a filter, it will be ** if the caller has set a filter, it will be
** called. The new preferred way of handling ** called. The new preferred way of handling
** Tidy diagnostics output is either a) define ** Tidy diagnostics output is either a) define
** a new output sink or b) install a message ** a new output sink or b) install a message
** filter routine. ** filter routine.
** **
** Keeps track of ShowWarnings, ShowErrors, etc. ** Keep track of ShowWarnings, ShowErrors, etc.
*/ */
ctmbstr TY_(ReleaseDate)(void); ctmbstr TY_(ReleaseDate)(void);
/* void TY_(ShowVersion)( TidyDocImpl* doc ); */
void TY_(ReportUnknownOption)( TidyDocImpl* doc, ctmbstr option ); void TY_(ReportUnknownOption)( TidyDocImpl* doc, ctmbstr option );
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option ); void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option );
void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc ); void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc );
/* void TY_(HelloMessage)( TidyDocImpl* doc, ctmbstr date, ctmbstr filename ); */
void TY_(ReportMarkupVersion)( TidyDocImpl* doc ); void TY_(ReportMarkupVersion)( TidyDocImpl* doc );
void TY_(ReportNumWarnings)( TidyDocImpl* doc ); void TY_(ReportNumWarnings)( TidyDocImpl* doc );
@ -60,117 +59,188 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code);
void TY_(ReportError)(TidyDocImpl* doc, Node* element, Node* node, uint code); void TY_(ReportError)(TidyDocImpl* doc, Node* element, Node* node, uint code);
void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code); void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code);
/* error codes for entities/numeric character references */
#define MISSING_SEMICOLON 1 /**
#define MISSING_SEMICOLON_NCR 2 * These tidyErrorCodes are used throughout libtidy, and also
#define UNKNOWN_ENTITY 3 * have associated localized strings to describe them.
#define UNESCAPED_AMPERSAND 4 *
#define APOS_UNDEFINED 5 * IMPORTANT: to maintain compatability with TidyMessageFilter3, if you add
* or remove keys from this enum, ALSO add/remove the corresponding key
* in language.c:tidyErrorFilterKeysStruct[]!
*/
typedef enum {
/* This MUST be present and first. */
CODES_TIDY_ERROR_FIRST = 200,
/* error codes for element messages */ /* error codes for entities/numeric character references */
#define MISSING_ENDTAG_FOR 6 MISSING_SEMICOLON,
#define MISSING_ENDTAG_BEFORE 7 MISSING_SEMICOLON_NCR,
#define DISCARDING_UNEXPECTED 8 UNKNOWN_ENTITY,
#define NESTED_EMPHASIS 9 UNESCAPED_AMPERSAND,
#define NON_MATCHING_ENDTAG 10 APOS_UNDEFINED,
#define TAG_NOT_ALLOWED_IN 11
#define MISSING_STARTTAG 12
#define UNEXPECTED_ENDTAG 13
#define USING_BR_INPLACE_OF 14
#define INSERTING_TAG 15
#define SUSPECTED_MISSING_QUOTE 16
#define MISSING_TITLE_ELEMENT 17
#define DUPLICATE_FRAMESET 18
#define CANT_BE_NESTED 19
#define OBSOLETE_ELEMENT 20
#define PROPRIETARY_ELEMENT 21
#define UNKNOWN_ELEMENT 22
#define TRIM_EMPTY_ELEMENT 23
#define COERCE_TO_ENDTAG 24
#define ILLEGAL_NESTING 25
#define NOFRAMES_CONTENT 26
#define CONTENT_AFTER_BODY 27
#define INCONSISTENT_VERSION 28
#define MALFORMED_COMMENT 29
#define BAD_COMMENT_CHARS 30
#define BAD_XML_COMMENT 31
#define BAD_CDATA_CONTENT 32
#define INCONSISTENT_NAMESPACE 33
#define DOCTYPE_AFTER_TAGS 34
#define MALFORMED_DOCTYPE 35
#define UNEXPECTED_END_OF_FILE 36
#define DTYPE_NOT_UPPER_CASE 37
#define TOO_MANY_ELEMENTS 38
#define UNESCAPED_ELEMENT 39
#define NESTED_QUOTATION 40
#define ELEMENT_NOT_EMPTY 41
#define ENCODING_IO_CONFLICT 42
#define MIXED_CONTENT_IN_BLOCK 43
#define MISSING_DOCTYPE 44
#define SPACE_PRECEDING_XMLDECL 45
#define TOO_MANY_ELEMENTS_IN 46
#define UNEXPECTED_ENDTAG_IN 47
#define REPLACING_ELEMENT 83
#define REPLACING_UNEX_ELEMENT 84
#define COERCE_TO_ENDTAG_WARN 85
/* error codes used for attribute messages */ /* error codes for element messages */
#define UNKNOWN_ATTRIBUTE 48 MISSING_ENDTAG_FOR,
#define INSERTING_ATTRIBUTE 49 MISSING_ENDTAG_BEFORE,
#define INSERTING_AUTO_ATTRIBUTE 50 DISCARDING_UNEXPECTED,
#define MISSING_ATTR_VALUE 51 NESTED_EMPHASIS,
#define BAD_ATTRIBUTE_VALUE 52 NON_MATCHING_ENDTAG,
#define UNEXPECTED_GT 53 TAG_NOT_ALLOWED_IN,
#define PROPRIETARY_ATTRIBUTE 54 MISSING_STARTTAG,
#define PROPRIETARY_ATTR_VALUE 55 UNEXPECTED_ENDTAG,
#define REPEATED_ATTRIBUTE 56 USING_BR_INPLACE_OF,
#define MISSING_IMAGEMAP 57 INSERTING_TAG,
#define XML_ATTRIBUTE_VALUE 58 SUSPECTED_MISSING_QUOTE,
#define UNEXPECTED_QUOTEMARK 59 MISSING_TITLE_ELEMENT,
#define MISSING_QUOTEMARK 60 DUPLICATE_FRAMESET,
#define ID_NAME_MISMATCH 61 CANT_BE_NESTED,
OBSOLETE_ELEMENT,
PROPRIETARY_ELEMENT,
UNKNOWN_ELEMENT,
TRIM_EMPTY_ELEMENT,
COERCE_TO_ENDTAG,
ILLEGAL_NESTING,
NOFRAMES_CONTENT,
CONTENT_AFTER_BODY,
INCONSISTENT_VERSION,
MALFORMED_COMMENT,
BAD_COMMENT_CHARS,
BAD_XML_COMMENT,
BAD_CDATA_CONTENT,
INCONSISTENT_NAMESPACE,
DOCTYPE_AFTER_TAGS,
MALFORMED_DOCTYPE,
UNEXPECTED_END_OF_FILE,
DTYPE_NOT_UPPER_CASE,
TOO_MANY_ELEMENTS,
UNESCAPED_ELEMENT,
NESTED_QUOTATION,
ELEMENT_NOT_EMPTY,
ENCODING_IO_CONFLICT,
MIXED_CONTENT_IN_BLOCK,
MISSING_DOCTYPE,
SPACE_PRECEDING_XMLDECL,
TOO_MANY_ELEMENTS_IN,
UNEXPECTED_ENDTAG_IN,
REPLACING_ELEMENT,
REPLACING_UNEX_ELEMENT,
COERCE_TO_ENDTAG_WARN,
#define BACKSLASH_IN_URI 62 /* error codes used for attribute messages */
#define FIXED_BACKSLASH 63
#define ILLEGAL_URI_REFERENCE 64
#define ESCAPED_ILLEGAL_URI 65
#define NEWLINE_IN_URI 66 UNKNOWN_ATTRIBUTE,
#define ANCHOR_NOT_UNIQUE 67 INSERTING_ATTRIBUTE,
INSERTING_AUTO_ATTRIBUTE,
MISSING_ATTR_VALUE,
BAD_ATTRIBUTE_VALUE,
UNEXPECTED_GT,
PROPRIETARY_ATTRIBUTE,
PROPRIETARY_ATTR_VALUE,
REPEATED_ATTRIBUTE,
MISSING_IMAGEMAP,
XML_ATTRIBUTE_VALUE,
UNEXPECTED_QUOTEMARK,
MISSING_QUOTEMARK,
ID_NAME_MISMATCH,
#define JOINING_ATTRIBUTE 68 BACKSLASH_IN_URI,
#define UNEXPECTED_EQUALSIGN 69 FIXED_BACKSLASH,
#define ATTR_VALUE_NOT_LCASE 70 ILLEGAL_URI_REFERENCE,
#define XML_ID_SYNTAX 71 ESCAPED_ILLEGAL_URI,
#define INVALID_ATTRIBUTE 72 NEWLINE_IN_URI,
ANCHOR_NOT_UNIQUE,
#define BAD_ATTRIBUTE_VALUE_REPLACED 73 JOINING_ATTRIBUTE,
UNEXPECTED_EQUALSIGN,
ATTR_VALUE_NOT_LCASE,
XML_ID_SYNTAX,
#define INVALID_XML_ID 74 INVALID_ATTRIBUTE,
#define UNEXPECTED_END_OF_FILE_ATTR 75
#define MISSING_ATTRIBUTE 86
#define WHITE_IN_URI 87
#define REMOVED_HTML5 88 /* this element removed from HTML5 */ BAD_ATTRIBUTE_VALUE_REPLACED,
#define BAD_BODY_HTML5 89 /* attr on body removed from HTML5 */
#define BAD_ALIGN_HTML5 90 /* use of align attr removed from HTML5 */
#define BAD_SUMMARY_HTML5 91 /* use of summary attr removed from HTML5 */
#define PREVIOUS_LOCATION 92 /* last */ INVALID_XML_ID,
UNEXPECTED_END_OF_FILE_ATTR,
MISSING_ATTRIBUTE,
WHITE_IN_URI,
/* character encoding errors */ REMOVED_HTML5, /* this element removed from HTML5 */
BAD_BODY_HTML5, /* attr on body removed from HTML5 */
BAD_ALIGN_HTML5, /* use of align attr removed from HTML5 */
BAD_SUMMARY_HTML5, /* use of summary attr removed from HTML5 */
#define VENDOR_SPECIFIC_CHARS 76 PREVIOUS_LOCATION, /* last */
#define INVALID_SGML_CHARS 77
#define INVALID_UTF8 78 /* character encoding errors */
#define INVALID_UTF16 79
#define ENCODING_MISMATCH 80 VENDOR_SPECIFIC_CHARS,
#define INVALID_URI 81 INVALID_SGML_CHARS,
#define INVALID_NCR 82 INVALID_UTF8,
INVALID_UTF16,
ENCODING_MISMATCH,
INVALID_URI,
INVALID_NCR,
/* This MUST be present and last. */
CODES_TIDY_ERROR_LAST
} tidyErrorCodes;
/**
* These tidyMessagesMisc are used throughout libtidy, and also
* have associated localized strings to describe them.
*/
typedef enum {
ACCESS_URL = 2048, /* Used to point to Web Accessibility Guidelines. */
ATRC_ACCESS_URL, /* Points to Tidy's accessibility page. */
FILE_CANT_OPEN, /* For retrieving a string when a file can't be opened. */
LINE_COLUMN_STRING, /* For retrieving localized `line %d column %d` text. */
STRING_CONTENT_LOOKS, /* `Document content looks like %s`. */
STRING_DISCARDING, /* For `discarding`. */
STRING_DOCTYPE_GIVEN, /* `Doctype given is \"%s\". */
STRING_ERROR_COUNT, /* `%u %s, %u %s were found!`. */
STRING_ERROR_COUNT_ERROR, /* `error` and `errors`. */
STRING_ERROR_COUNT_WARNING, /* `warning` and `warnings`. */
STRING_HELLO_ACCESS, /* Accessibility hello message. */
STRING_HTML_PROPRIETARY, /* `HTML Proprietary`/ */
STRING_MISSING_MALFORMED, /* For `missing or malformed argument for option: %s`. */
STRING_NO_ERRORS, /* `No warnings or errors were found.\n\n`. */
STRING_NO_SYSID, /* `No system identifier in emitted doctype`. */
STRING_NOT_ALL_SHOWN, /* ` Not all warnings/errors were shown.\n\n`. */
STRING_PLAIN_TEXT, /* For retrieving a string `plain text`. */
STRING_REPLACING, /* For `replacing`. */
STRING_SPECIFIED, /* For `specified`. */
STRING_UNKNOWN_FILE, /* `%s: can't open file \"%s\"\n`. */
STRING_UNKNOWN_OPTION, /* For retrieving a string `unknown option: %s`. */
STRING_UNRECZD_OPTION, /* `unrecognized option -%c use -help to list options\n`. */
STRING_XML_DECLARATION, /* For retrieving a string `XML declaration`. */
TEXT_ACCESS_ADVICE1, /* Explanatory text. */
TEXT_ACCESS_ADVICE2, /* Explanatory text. */
TEXT_BAD_FORM, /* Explanatory text. */
TEXT_BAD_MAIN, /* Explanatory text. */
TEXT_GENERAL_INFO, /* Explanatory text. */
TEXT_GENERAL_INFO_PLEA, /* Explanatory text. */
TEXT_HTML_T_ALGORITHM, /* Paragraph for describing the HTML table algorithm. */
TEXT_INVALID_URI, /* Explanatory text. */
TEXT_INVALID_UTF16, /* Explanatory text. */
TEXT_INVALID_UTF8, /* Explanatory text. */
TEXT_M_IMAGE_ALT, /* Explanatory text. */
TEXT_M_IMAGE_MAP, /* Explanatory text. */
TEXT_M_LINK_ALT, /* Explanatory text. */
TEXT_M_SUMMARY, /* Explanatory text. */
TEXT_NEEDS_INTERVENTION, /* Explanatory text. */
TEXT_SGML_CHARS, /* Explanatory text. */
TEXT_USING_BODY, /* Explanatory text. */
TEXT_USING_FONT, /* Explanatory text. */
TEXT_USING_FRAMES, /* Explanatory text. */
TEXT_USING_LAYER, /* Explanatory text. */
TEXT_USING_NOBR, /* Explanatory text. */
TEXT_USING_SPACER, /* Explanatory text. */
TEXT_VENDOR_CHARS, /* Explanatory text. */
TEXT_WINDOWS_CHARS /* Explanatory text. */
} tidyMessagesMisc;
/* accessibility flaws */ /* accessibility flaws */
@ -191,9 +261,6 @@ void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code);
#define USING_FONT 8 #define USING_FONT 8
#define USING_BODY 16 #define USING_BODY 16
#define REPLACED_CHAR 0
#define DISCARDED_CHAR 1
/* badchar bit field */ /* badchar bit field */
#define BC_VENDOR_SPECIFIC_CHARS 1 #define BC_VENDOR_SPECIFIC_CHARS 1
@ -204,4 +271,10 @@ void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code);
#define BC_INVALID_URI 32 #define BC_INVALID_URI 32
#define BC_INVALID_NCR 64 #define BC_INVALID_NCR 64
/* Lexer and I/O Macros */
#define REPLACED_CHAR 0
#define DISCARDED_CHAR 1
#endif /* __MESSAGE_H__ */ #endif /* __MESSAGE_H__ */

View file

@ -58,6 +58,7 @@ struct _TidyDocImpl
StreamOut* errout; StreamOut* errout;
TidyReportFilter mssgFilt; TidyReportFilter mssgFilt;
TidyReportFilter2 mssgFilt2; TidyReportFilter2 mssgFilt2;
TidyReportFilter3 mssgFilt3;
TidyOptCallback pOptCallback; TidyOptCallback pOptCallback;
TidyPPProgress progressCallback; TidyPPProgress progressCallback;

View file

@ -516,8 +516,7 @@ ctmbstr TIDY_CALL tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId,
ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt ) ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
{ {
const TidyOptionId optId = tidyOptGetId( opt ); const TidyOptionId optId = tidyOptGetId( opt );
const TidyOptionDoc* docDesc = TY_(OptGetDocDesc)( optId ); return tidyLocalizedString(optId);
return docDesc ? docDesc->doc : NULL;
} }
TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt ) TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
@ -657,6 +656,11 @@ Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt )
return no; return no;
} }
/* TidyReportFilter2 functions similar to TidyReportFilter, but provides the
** built-in English format string and va_list so that LibTidy users can use
** the format string as a lookup key for providing their own error
** localizations.
*/
Bool TIDY_CALL tidySetReportFilter2( TidyDoc tdoc, TidyReportFilter2 filt ) Bool TIDY_CALL tidySetReportFilter2( TidyDoc tdoc, TidyReportFilter2 filt )
{ {
TidyDocImpl* impl = tidyDocToImpl( tdoc ); TidyDocImpl* impl = tidyDocToImpl( tdoc );
@ -668,6 +672,22 @@ Bool TIDY_CALL tidySetReportFilter2( TidyDoc tdoc, TidyReportFilter2 filt
return no; return no;
} }
/* TidyReportFilter3 functions similar to TidyReportFilter, but provides the
* string version of the internal enum name so that LibTidy users can use
** the string as a lookup key for providing their own error localizations.
** See the string definitions in language.h
*/
Bool TIDY_CALL tidySetReportFilter3( TidyDoc tdoc, TidyReportFilter3 filt )
{
TidyDocImpl* impl = tidyDocToImpl( tdoc );
if ( impl )
{
impl->mssgFilt3 = filt;
return yes;
}
return no;
}
#if 0 /* Not yet */ #if 0 /* Not yet */
int tidySetContentOutputSink( TidyDoc tdoc, TidyOutputSink* outp ) int tidySetContentOutputSink( TidyDoc tdoc, TidyOutputSink* outp )
{ {

View file

@ -264,6 +264,7 @@ Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 )
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args) int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
{ {
int retval; int retval;
#if HAS_VSNPRINTF #if HAS_VSNPRINTF
retval = vsnprintf(buffer, count - 1, format, args); retval = vsnprintf(buffer, count - 1, format, args);
/* todo: conditionally null-terminate the string? */ /* todo: conditionally null-terminate the string? */
@ -279,13 +280,7 @@ int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
int retval; int retval;
va_list args; va_list args;
va_start(args, format); va_start(args, format);
#if HAS_VSNPRINTF retval = TY_(tmbvsnprintf)(buffer, count, format, args);
retval = vsnprintf(buffer, count - 1, format, args);
/* todo: conditionally null-terminate the string? */
buffer[count - 1] = 0;
#else
retval = vsprintf(buffer, format, args);
#endif /* HAS_VSNPRINTF */
va_end(args); va_end(args);
return retval; return retval;
} }

View file

@ -1,2 +1,2 @@
5.1.33 5.1.34
2016.01.07 2016.01.30