Add new `rebase` command to CLI.

This is intended to make it very, very easy to update the POT and all of the POs when
changes are made to `language_en.h`. Used without an sha-1 hash, untranslated strings
(i.e., the "source" strings) are updated in the POT/PO's.

However if you specify an --sha=HASH (or -c HASH) option, then the script will use git
to examine the `language_en.h` file from that specified commit, determing the strings
that have changed, and mark all of these strings as `fuzzy` in the POs. This will serve
as a flag to translators that the original has changed. In addition, this `fuzzy` flag
will appear in the headers as "(fuzzy) " in the item comments.

If a translator edits the header directly, he should remove the "(fuzzy )" in the
comment. Then when the PO is rebuilt, the fuzzy flag will be removed automatically.
The reverse is also true; if a translator is working with the PO, he or she should
clear the fuzzy flag and the comment will be adjusted accordingly in the generated
header.
This commit is contained in:
Jim Derry 2016-03-24 11:38:04 +08:00
parent ad7bdee3b9
commit 7d2ddee775
7 changed files with 164 additions and 76 deletions

View File

@ -2,3 +2,4 @@ source 'https://rubygems.org'
gem 'thor'
gem 'i18n'
gem 'git'

View File

@ -14,6 +14,7 @@ require 'erb' # Needed for templating.
require 'thor' # thor provides robust command line parameter parsing.
require 'i18n' # Cross-platform access to `locale`.
require 'digest' # For computing checksums.
require 'git' # For working with old versions of files.
################################################################################
@ -155,11 +156,7 @@ module PoConvertModule
#########################################################
# parse_po_section( content )
# Parses a single PO section. Note that will will still
# parse and accept ##BLAH## as #if groups; they just
# won't be used. We'll get the information live from
# the English header instead of trying to store meta-
# data in the PO/POT.
# Parses a single PO section.
#########################################################
def parse_po_section( content )
@ -167,12 +164,12 @@ module PoConvertModule
# if we want to capture more PO stuff in the future.
map = [
[ :START, :COMMENT, :SET_COMMENT, :START ],
[ :START, :IFGROUP, :SET_GROUP, :START ],
[ :START, :FLAG, :SET_FLAG, :START ],
[ :START, :NEW_ITEM, :SET_INIT, :CONTINUE ],
[ :START, :OTHER, :NOOP, :START ],
[ :START, :EMPTY, :NOOP, :START ],
[ :CONTINUE, :COMMENT, :ERROR, nil ],
[ :CONTINUE, :IFGROUP, :ERROR, nil ],
[ :CONTINUE, :FLAG, :ERROR, nil ],
[ :CONTINUE, :NEW_ITEM, :SET_FINAL, :CONTINUE ],
[ :CONTINUE, :EMPTY, :SET_FINAL, :START ],
[ :CONTINUE, :OTHER, :ADD_TO, :CONTINUE ],
@ -180,7 +177,7 @@ module PoConvertModule
current_label = nil
current_comment = nil
current_if_group = nil
current_flag = nil
current_cases = {} # 'case' => string
state = :START
buffer = ''
@ -193,7 +190,7 @@ module PoConvertModule
input = :OTHER
input = :EMPTY if line == "\n"
input = :COMMENT if line.start_with?('#.')
input = :IFGROUP if line[/^#\..*##.*##/]
input = :FLAG if line.start_with?('#,')
input = :NEW_ITEM if line.start_with?('msgctxt', 'msgid', 'msgstr')
# Find our current state-input pair
@ -222,8 +219,8 @@ module PoConvertModule
item = line[/^(.*?)\s/, 1]
when :SET_COMMENT
current_comment = line.match(/#\.\s*(.*?)$/)[1]
when :SET_GROUP
current_if_group = line.match(/##(.*)##/)[1]
when :SET_FLAG
current_flag = line.match(/#\,\s*(.*?)$/)[1]
when :ERROR
@@log.error "#{__method__}: Could NOT parse part of the PO file. Aborting!\n"
@@log.error "#{__method__}: Last known label was \"#{current_label}\".\n"
@ -240,7 +237,7 @@ module PoConvertModule
# We have some nice local vars but let's put these into a hash
# just like PoHeader file uses:
# :keyword => { '#' => { :comment, :if_group, :case, :string } }
# :keyword => { '#' => { :comment, :fuzzy, :case, :string } }
# We will also reject items that have no string value.
result = {}
if current_label
@ -248,9 +245,11 @@ module PoConvertModule
result[current_label] = {}
current_cases.each do | key, value |
unless value == ''
fuzzy = ( current_flag =~ /fuzzy/i ) != nil
result[current_label][key] = {}
result[current_label][key][:comment] = current_comment
result[current_label][key][:if_group] = current_if_group
result[current_label][key][:comment] = fuzzy ? "(fuzzy) #{current_comment}" : current_comment
result[current_label][key][:fuzzy] = fuzzy
result[current_label][key][:if_group] = nil
result[current_label][key][:case] = key
result[current_label][key][:string] = value
end
@ -362,8 +361,8 @@ module PoConvertModule
l_key = key.to_sym
self.items[l_key] = {} unless items.has_key?(l_key)
self.items[l_key][num_case] = {}
self.items[l_key][num_case][:comment] = comment
self.items[l_key][num_case][:fuzzy] = ( comment =~ /\(fuzzy\)/i ) != nil
self.items[l_key][num_case][:comment] = comment ? comment.sub( /\(fuzzy\) /i, '') : nil
self.items[l_key][num_case][:fuzzy] = ( comment =~ /\(fuzzy\) /i ) != nil
self.items[l_key][num_case][:case] = num_case
self.items[l_key][num_case][:if_group] = nil
# Reconstitute Hex Escapes
@ -418,7 +417,7 @@ module PoConvertModule
@po_locale = nil # The locale to use to generate PO files.
@known_locales = {} # The locales we know about.
@emacs_footer = false # Indicates whether or not to add emacs instructions.
@plaintext = false # Indicates whether or not we should stick to plaintext.
@plaintext = true # Indicates whether or not we should stick to plaintext.
@force_comments = false # Force comments into non-English header files?
end
@ -571,11 +570,11 @@ module PoConvertModule
#########################################################
# convert_to_po( source_file_h, base_file )
# convert_to_po( source_file_h, base_file, fuzzy_list )
# Perform the conversion for xgettext, msginit, and
# msgunfmt.
#########################################################
def convert_to_po( source_file_h = nil, base_file = nil )
def convert_to_po( source_file_h = nil, base_file = nil, fuzzy_list = nil )
return false unless english_header?
# What we actually do depends on what was setup for us.
@ -586,14 +585,16 @@ module PoConvertModule
action = :msginit if source_file_h.nil? && po_locale
action = :xgettext if source_file_h.nil? && po_locale.nil?
# Untranslated Items form the basis of all output. For convenience
# we can use some non-English strings as an "untranslated" string,
# e.g., to help translate regional formats.
# lang_en serves as the master reference for all output, especially
# comments and metadata.
lang_en = PoHeaderFile.new(@@default_en)
return false unless lang_en.source_file
# untranslated_items serves as the source for *untranslated* strings.
# This differs from lang_en in that we may overwrite some of the
# lang_en strings from the base_file, later. This can help when
# translating, e.g., regional formats.
untranslated_items = lang_en.items.clone
if base_file
lang_base = PoHeaderFile.new(base_file)
return false unless lang_base.source_file
@ -601,7 +602,7 @@ module PoConvertModule
end
# We will use lang_source if we have a source_file_h, i.e., msgunfmt,
# as the source for translated strings.
# as the source for *translated* strings.
if source_file_h
lang_source = PoHeaderFile.new(source_file_h)
return false unless lang_source.source_file
@ -609,6 +610,20 @@ module PoConvertModule
lang_source = nil
end
# If we were given a fuzzy_list and we have a source_file, then
# we have to mark appropriate items as fuzzy.
if fuzzy_list && fuzzy_list.count > 0 && lang_source
untranslated_items.each do |key, value|
if fuzzy_list.include?(key)
value.each_value do |v|
v[:fuzzy] = true
end
end
end
end
# The information in the PO header can come from a few different sources
# depending on what we're doing.
header_plural_forms = nil
@ -658,7 +673,7 @@ msgstr ""
"#{header_pot_line}\\n"
"Last-Translator: #{ENV['USER']}#{ENV['USERNAME']}\\n"
"Language-Team: \\n"
"BAD"
HEREDOC
untranslated_items.delete(:TIDY_LANGUAGE)
@ -670,7 +685,7 @@ msgstr ""
end
attribs = []
attribs << 'fuzzy' if value['0'][:fuzzy]
attribs << 'fuzzy' if value['0'][:fuzzy] && action == :msgunfmt
attribs << 'c-format' if %w(%u %s %d).any? { | find | value['0'][:string].include?(find) }
if attribs.count > 0
report << "#, #{attribs.join(', ')}\n"
@ -784,10 +799,13 @@ msgstr ""
# Additionally we will only use comments from language_en.h. Besides
# preventing us from having to format them, we ensure that only the
# canonical comments are put into the H file in the event of changes.
# Additionally only include comments if enabled.
# Finally add fuzzy notes to comments if the PO item is fuzzy.
po_content.items.each do |key, value|
value.each_value do |item_entry|
item_entry[:if_group] = lang_en.items[key]['0'][:if_group]
item_entry[:comment] = force_comments ? lang_en.items[key]['0'][:comment] : nil
item_entry[:comment] = "(fuzzy) #{item_entry[:comment]}" if item_entry[:fuzzy]
end
end
@ -1130,6 +1148,110 @@ Complete Help:
end # msgfmt
#########################################################
# rebase
# See long_desc
#########################################################
option :sha,
:type =>:string,
:desc => 'Specify the hash against which to check for changed strings.',
:aliases => '-c'
desc 'rebase [--sha=HASH]', 'Creates fresh POT, POs, and headers after updates to language_en.h.'
long_desc <<-LONG_DESC
After changing strings in language_en.h, this command will generate a fresh POT
template, as well as regenerate POs for each language in src/. Finally, it will
regenerate the language header files for each of the new PO files. Items that
have changed in English will be appropriately marked as fuzzy in the PO files.
Source files will *not* be overwritten. All generated files will be placed into
the working directory. Please review them before committing them to source.
If you specify the SHA-1 checksum of the commit for comparison purposes, then
this command identifies fuzzy items by comparing language_en.h with a previous
version as identified by the SHA-1.
Use case: If you change language_en.h, this handy command updates everything
else nearly automatically.
LONG_DESC
def rebase()
error_count = 0
fuzzy_list = nil
if options[:sha]
pwd = File.expand_path( File.join(Dir.getwd, '..') )
sha = options[:sha]
temp_file = "~#{sha}.h"
project = Git.open(pwd)
# We'll get the old version of the file from the specified commit,
# and then write it to a temporary file. Then we can parse both
# this temporary file as well as the current version of the file
# and detect the differences.
File.open( temp_file, 'w') { |f| f.write( project.show(sha, File.join('src', 'language_en.h')) ) }
header_old = PoHeaderFile.new(temp_file)
header_new = PoHeaderFile.new(@@default_en)
File.delete( temp_file )
# Compare each item in the current version with the value, if any,
# in the previous version in order to build a list of fuzzy stuff.
fuzzy_list = []
header_new.items.each do |key, value|
value.each do |plural_key, plural_value|
new_value = plural_value[:string]
old_value = header_old.items.include?(key) ? header_old.items[key][plural_key][:string] : nil
unless old_value == new_value
fuzzy_list << key
end
end
end
fuzzy_list.uniq!
end
# We're ready to generate the POT, which requires nothing special.
converter = PoConverter.new
unless converter.convert_to_po( nil, nil)
error_count += 1
puts 'There was an issue generating the POT. Will continue anyway.'
end
# Build a list of header files. Keep this list instead of counting
# on reading the working directory later.
header_path = File.join(pwd, 'src', 'language_*.h')
header_list = nil
Dir.chdir(File.join(pwd, 'src')) do
header_list = Dir.glob('language_*.h')
end
header_list.delete('language_en.h')
# Building the POs is straight forward.
header_list.each do |input_file|
filename = File.join(pwd, 'src', input_file)
converter = PoConverter.new
error_count = converter.convert_to_po( filename, nil, fuzzy_list ) ? error_count : error_count + 1
end
# Building the Headers is straight forward, too.
header_list.each do |input_file|
filename = "#{File.basename(input_file, '.*')}.po"
converter = PoConverter.new
error_count = converter.convert_to_h( filename, nil ) ? error_count : error_count + 1
end
if error_count == 0
puts 'rebase exited without errors.'
else
puts "rebase exited with errors #{error_count} time(s). Consider using the --verbose or --debug options."
exit 1
end
end # msgfmt
#########################################################
# set_options
# Handles command line options.

View File

@ -5,10 +5,10 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n"
"POT-Creation-Date: 2016-03-24 10:59:07\n"
"POT-Creation-Date: 2016-03-24 11:36:17\n"
"Last-Translator: jderry\n"
"Language-Team: \n"
"BAD"
#. Only translate if a URL to the target language can be found.
msgctxt "ACCESS_URL"
msgid "http://www.w3.org/WAI/GL"

View File

@ -28,7 +28,7 @@
*
* Orginating PO file metadata:
* PO_LAST_TRANSLATOR=jderry
* PO_REVISION_DATE=2016-02-17 20:04:18
* PO_REVISION_DATE=2016-03-24 10:59:55
*/
#ifdef _MSC_VER
@ -65,19 +65,14 @@ static languageDefinition language_en_gb = { whichPluralForm_en_gb, {
{/* Specify the ll or ll_cc language code here. */
TIDY_LANGUAGE, 0, "en_gb"
},
{/* This console output should be limited to 78 characters per line. */
TEXT_USING_FONT, 0,
{ TEXT_USING_FONT, 0,
"You are recommended to use CSS to specify the font and\n"
"properties such as its size and colour. This will reduce\n"
"the size of HTML files and make them easier to maintain\n"
"compared with using <FONT> elements.\n\n"
},
{/* This console output should be limited to 78 characters per line. */
TEXT_USING_BODY, 0, "You are recommended to use CSS to specify page and link colours\n"
},
{/* This console output should be limited to 78 characters per line.
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TEXT_GENERAL_INFO_PLEA, 0,
{ TEXT_USING_BODY, 0, "You are recommended to use CSS to specify page and link colours\n" },
{ TEXT_GENERAL_INFO_PLEA, 0,
"\n"
"Would you like to see Tidy in proper, British English? Please consider \n"
"helping us to localise HTML Tidy. For details please see \n"
@ -96,15 +91,7 @@ static languageDefinition language_en_gb = { whichPluralForm_en_gb, {
{ COLOR_CONTRAST_VISITED_LINK, 0, "[2.2.1.4]: poor colour contrast (visited link)." },
#endif /* SUPPORT_ACCESSIBILITY_CHECKS */
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
<br/>.
- Entities, tags, attributes, etc., should be enclosed in <code></code>.
- Option values should be enclosed in <var></var>.
- It's very important that <br/> be self-closing!
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
TidyMergeDivs, 0,
{ TidyMergeDivs, 0,
"This option can be used to modify the behaviour of <code>clean</code> when "
"set to <code>yes</code>."
"<br/>"
@ -120,15 +107,7 @@ static languageDefinition language_en_gb = { whichPluralForm_en_gb, {
"<code>&lt;div&gt;</code> are discarded with the exception of "
"<code>class</code> and <code>style</code>. "
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
<br/>.
- Entities, tags, attributes, etc., should be enclosed in <code></code>.
- Option values should be enclosed in <var></var>.
- It's very important that <br/> be self-closing!
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
TidyMergeSpans, 0,
{ TidyMergeSpans, 0,
"This option can be used to modify the behaviour of <code>clean</code> when "
"set to <code>yes</code>."
"<br/>"
@ -137,15 +116,7 @@ static languageDefinition language_en_gb = { whichPluralForm_en_gb, {
"<br/>"
"The algorithm is identical to the one used by <code>merge-divs</code>. "
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
<br/>.
- Entities, tags, attributes, etc., should be enclosed in <code></code>.
- Option values should be enclosed in <var></var>.
- It's very important that <br/> be self-closing!
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
TidyReplaceColor, 0,
{ TidyReplaceColor, 0,
"This option specifies if Tidy should replace numeric values in colour "
"attributes with HTML/XHTML colour names where defined, e.g. replace "
"<code>#ffffff</code> with <code>white</code>. "

View File

@ -28,7 +28,7 @@
*
* Orginating PO file metadata:
* PO_LAST_TRANSLATOR=jderry
* PO_REVISION_DATE=2016-03-23 14:49:53
* PO_REVISION_DATE=2016-03-24 10:59:55
*/
#ifdef _MSC_VER

View File

@ -28,7 +28,7 @@
*
* Orginating PO file metadata:
* PO_LAST_TRANSLATOR=jderry
* PO_REVISION_DATE=2016-02-17 20:04:18
* PO_REVISION_DATE=2016-03-24 10:59:55
*/
#ifdef _MSC_VER
@ -65,9 +65,7 @@ static languageDefinition language_es_mx = { whichPluralForm_es_mx, {
{/* Specify the ll or ll_cc language code here. */
TIDY_LANGUAGE, 0, "es_mx"
},
{/* This console output should be limited to 78 characters per line.
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TEXT_GENERAL_INFO_PLEA, 0,
{ TEXT_GENERAL_INFO_PLEA, 0,
"\n"
"¿Le gustaría ver Tidy en adecuada, español mexicano? Por favor considere \n"
"ayudarnos a localizar HTML Tidy. Para más detalles consulte \n"

View File

@ -28,7 +28,7 @@
*
* Orginating PO file metadata:
* PO_LAST_TRANSLATOR=jderry
* PO_REVISION_DATE=2016-02-17 20:04:18
* PO_REVISION_DATE=2016-03-24 10:59:55
*/
#ifdef _MSC_VER
@ -68,12 +68,8 @@ static languageDefinition language_zh_cn = { whichPluralForm_zh_cn, {
{ FILE_CANT_OPEN, 0, "无法打开”%s”\n" },
{ LINE_COLUMN_STRING, 0, "行 %d 列 %d - " },
{ STRING_CONTENT_LOOKS, 0, "文档内容看起来像 %s" },
{/* The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TC_STRING_VERS_A, 0, "HTML Tidy 用于 %s 版本 %s"
},
{/* The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TC_STRING_VERS_B, 0, "HTML Tidy 版本 %s"
},
{ TC_STRING_VERS_A, 0, "HTML Tidy 用于 %s 版本 %s" },
{ TC_STRING_VERS_B, 0, "HTML Tidy 版本 %s" },
{/* This MUST be present and last. */
TIDY_MESSAGE_TYPE_LAST, 0, NULL