From 7d2ddee775368607e4b9d2a1cff89feecc27f41c Mon Sep 17 00:00:00 2001 From: Jim Derry Date: Thu, 24 Mar 2016 11:38:04 +0800 Subject: [PATCH] Add new `rebase` command to CLI. This is intended to make it very, very easy to update the POT and all of the POs when changes are made to `language_en.h`. Used without an sha-1 hash, untranslated strings (i.e., the "source" strings) are updated in the POT/PO's. However if you specify an --sha=HASH (or -c HASH) option, then the script will use git to examine the `language_en.h` file from that specified commit, determing the strings that have changed, and mark all of these strings as `fuzzy` in the POs. This will serve as a flag to translators that the original has changed. In addition, this `fuzzy` flag will appear in the headers as "(fuzzy) " in the item comments. If a translator edits the header directly, he should remove the "(fuzzy )" in the comment. Then when the PO is rebuilt, the fuzzy flag will be removed automatically. The reverse is also true; if a translator is working with the PO, he or she should clear the fuzzy flag and the comment will be adjusted accordingly in the generated header. --- localize/Gemfile | 1 + localize/poconvert.rb | 174 ++++++++++++++++++++++++++++----- localize/translations/tidy.pot | 4 +- src/language_en_gb.h | 43 ++------ src/language_es.h | 2 +- src/language_es_mx.h | 6 +- src/language_zh_cn.h | 10 +- 7 files changed, 164 insertions(+), 76 deletions(-) diff --git a/localize/Gemfile b/localize/Gemfile index 2ad212f..3ee8fa2 100644 --- a/localize/Gemfile +++ b/localize/Gemfile @@ -2,3 +2,4 @@ source 'https://rubygems.org' gem 'thor' gem 'i18n' +gem 'git' diff --git a/localize/poconvert.rb b/localize/poconvert.rb index 8a047df..3686c6d 100755 --- a/localize/poconvert.rb +++ b/localize/poconvert.rb @@ -14,6 +14,7 @@ require 'erb' # Needed for templating. require 'thor' # thor provides robust command line parameter parsing. require 'i18n' # Cross-platform access to `locale`. require 'digest' # For computing checksums. +require 'git' # For working with old versions of files. ################################################################################ @@ -155,11 +156,7 @@ module PoConvertModule ######################################################### # parse_po_section( content ) - # Parses a single PO section. Note that will will still - # parse and accept ##BLAH## as #if groups; they just - # won't be used. We'll get the information live from - # the English header instead of trying to store meta- - # data in the PO/POT. + # Parses a single PO section. ######################################################### def parse_po_section( content ) @@ -167,12 +164,12 @@ module PoConvertModule # if we want to capture more PO stuff in the future. map = [ [ :START, :COMMENT, :SET_COMMENT, :START ], - [ :START, :IFGROUP, :SET_GROUP, :START ], + [ :START, :FLAG, :SET_FLAG, :START ], [ :START, :NEW_ITEM, :SET_INIT, :CONTINUE ], [ :START, :OTHER, :NOOP, :START ], [ :START, :EMPTY, :NOOP, :START ], [ :CONTINUE, :COMMENT, :ERROR, nil ], - [ :CONTINUE, :IFGROUP, :ERROR, nil ], + [ :CONTINUE, :FLAG, :ERROR, nil ], [ :CONTINUE, :NEW_ITEM, :SET_FINAL, :CONTINUE ], [ :CONTINUE, :EMPTY, :SET_FINAL, :START ], [ :CONTINUE, :OTHER, :ADD_TO, :CONTINUE ], @@ -180,7 +177,7 @@ module PoConvertModule current_label = nil current_comment = nil - current_if_group = nil + current_flag = nil current_cases = {} # 'case' => string state = :START buffer = '' @@ -193,7 +190,7 @@ module PoConvertModule input = :OTHER input = :EMPTY if line == "\n" input = :COMMENT if line.start_with?('#.') - input = :IFGROUP if line[/^#\..*##.*##/] + input = :FLAG if line.start_with?('#,') input = :NEW_ITEM if line.start_with?('msgctxt', 'msgid', 'msgstr') # Find our current state-input pair @@ -222,8 +219,8 @@ module PoConvertModule item = line[/^(.*?)\s/, 1] when :SET_COMMENT current_comment = line.match(/#\.\s*(.*?)$/)[1] - when :SET_GROUP - current_if_group = line.match(/##(.*)##/)[1] + when :SET_FLAG + current_flag = line.match(/#\,\s*(.*?)$/)[1] when :ERROR @@log.error "#{__method__}: Could NOT parse part of the PO file. Aborting!\n" @@log.error "#{__method__}: Last known label was \"#{current_label}\".\n" @@ -240,7 +237,7 @@ module PoConvertModule # We have some nice local vars but let's put these into a hash # just like PoHeader file uses: - # :keyword => { '#' => { :comment, :if_group, :case, :string } } + # :keyword => { '#' => { :comment, :fuzzy, :case, :string } } # We will also reject items that have no string value. result = {} if current_label @@ -248,9 +245,11 @@ module PoConvertModule result[current_label] = {} current_cases.each do | key, value | unless value == '' + fuzzy = ( current_flag =~ /fuzzy/i ) != nil result[current_label][key] = {} - result[current_label][key][:comment] = current_comment - result[current_label][key][:if_group] = current_if_group + result[current_label][key][:comment] = fuzzy ? "(fuzzy) #{current_comment}" : current_comment + result[current_label][key][:fuzzy] = fuzzy + result[current_label][key][:if_group] = nil result[current_label][key][:case] = key result[current_label][key][:string] = value end @@ -362,8 +361,8 @@ module PoConvertModule l_key = key.to_sym self.items[l_key] = {} unless items.has_key?(l_key) self.items[l_key][num_case] = {} - self.items[l_key][num_case][:comment] = comment - self.items[l_key][num_case][:fuzzy] = ( comment =~ /\(fuzzy\)/i ) != nil + self.items[l_key][num_case][:comment] = comment ? comment.sub( /\(fuzzy\) /i, '') : nil + self.items[l_key][num_case][:fuzzy] = ( comment =~ /\(fuzzy\) /i ) != nil self.items[l_key][num_case][:case] = num_case self.items[l_key][num_case][:if_group] = nil # Reconstitute Hex Escapes @@ -418,7 +417,7 @@ module PoConvertModule @po_locale = nil # The locale to use to generate PO files. @known_locales = {} # The locales we know about. @emacs_footer = false # Indicates whether or not to add emacs instructions. - @plaintext = false # Indicates whether or not we should stick to plaintext. + @plaintext = true # Indicates whether or not we should stick to plaintext. @force_comments = false # Force comments into non-English header files? end @@ -571,11 +570,11 @@ module PoConvertModule ######################################################### - # convert_to_po( source_file_h, base_file ) + # convert_to_po( source_file_h, base_file, fuzzy_list ) # Perform the conversion for xgettext, msginit, and # msgunfmt. ######################################################### - def convert_to_po( source_file_h = nil, base_file = nil ) + def convert_to_po( source_file_h = nil, base_file = nil, fuzzy_list = nil ) return false unless english_header? # What we actually do depends on what was setup for us. @@ -586,14 +585,16 @@ module PoConvertModule action = :msginit if source_file_h.nil? && po_locale action = :xgettext if source_file_h.nil? && po_locale.nil? - # Untranslated Items form the basis of all output. For convenience - # we can use some non-English strings as an "untranslated" string, - # e.g., to help translate regional formats. + # lang_en serves as the master reference for all output, especially + # comments and metadata. lang_en = PoHeaderFile.new(@@default_en) return false unless lang_en.source_file + # untranslated_items serves as the source for *untranslated* strings. + # This differs from lang_en in that we may overwrite some of the + # lang_en strings from the base_file, later. This can help when + # translating, e.g., regional formats. untranslated_items = lang_en.items.clone - if base_file lang_base = PoHeaderFile.new(base_file) return false unless lang_base.source_file @@ -601,7 +602,7 @@ module PoConvertModule end # We will use lang_source if we have a source_file_h, i.e., msgunfmt, - # as the source for translated strings. + # as the source for *translated* strings. if source_file_h lang_source = PoHeaderFile.new(source_file_h) return false unless lang_source.source_file @@ -609,6 +610,20 @@ module PoConvertModule lang_source = nil end + + # If we were given a fuzzy_list and we have a source_file, then + # we have to mark appropriate items as fuzzy. + if fuzzy_list && fuzzy_list.count > 0 && lang_source + untranslated_items.each do |key, value| + if fuzzy_list.include?(key) + value.each_value do |v| + v[:fuzzy] = true + end + end + + end + end + # The information in the PO header can come from a few different sources # depending on what we're doing. header_plural_forms = nil @@ -658,7 +673,7 @@ msgstr "" "#{header_pot_line}\\n" "Last-Translator: #{ENV['USER']}#{ENV['USERNAME']}\\n" "Language-Team: \\n" -"BAD" + HEREDOC untranslated_items.delete(:TIDY_LANGUAGE) @@ -670,7 +685,7 @@ msgstr "" end attribs = [] - attribs << 'fuzzy' if value['0'][:fuzzy] + attribs << 'fuzzy' if value['0'][:fuzzy] && action == :msgunfmt attribs << 'c-format' if %w(%u %s %d).any? { | find | value['0'][:string].include?(find) } if attribs.count > 0 report << "#, #{attribs.join(', ')}\n" @@ -784,10 +799,13 @@ msgstr "" # Additionally we will only use comments from language_en.h. Besides # preventing us from having to format them, we ensure that only the # canonical comments are put into the H file in the event of changes. + # Additionally only include comments if enabled. + # Finally add fuzzy notes to comments if the PO item is fuzzy. po_content.items.each do |key, value| value.each_value do |item_entry| item_entry[:if_group] = lang_en.items[key]['0'][:if_group] item_entry[:comment] = force_comments ? lang_en.items[key]['0'][:comment] : nil + item_entry[:comment] = "(fuzzy) #{item_entry[:comment]}" if item_entry[:fuzzy] end end @@ -1130,6 +1148,110 @@ Complete Help: end # msgfmt + ######################################################### + # rebase + # See long_desc + ######################################################### + option :sha, + :type =>:string, + :desc => 'Specify the hash against which to check for changed strings.', + :aliases => '-c' + desc 'rebase [--sha=HASH]', 'Creates fresh POT, POs, and headers after updates to language_en.h.' + long_desc <<-LONG_DESC + After changing strings in language_en.h, this command will generate a fresh POT + template, as well as regenerate POs for each language in src/. Finally, it will + regenerate the language header files for each of the new PO files. Items that + have changed in English will be appropriately marked as fuzzy in the PO files. + + Source files will *not* be overwritten. All generated files will be placed into + the working directory. Please review them before committing them to source. + + If you specify the SHA-1 checksum of the commit for comparison purposes, then + this command identifies fuzzy items by comparing language_en.h with a previous + version as identified by the SHA-1. + + Use case: If you change language_en.h, this handy command updates everything + else nearly automatically. + LONG_DESC + def rebase() + error_count = 0 + fuzzy_list = nil + + if options[:sha] + pwd = File.expand_path( File.join(Dir.getwd, '..') ) + sha = options[:sha] + temp_file = "~#{sha}.h" + project = Git.open(pwd) + + # We'll get the old version of the file from the specified commit, + # and then write it to a temporary file. Then we can parse both + # this temporary file as well as the current version of the file + # and detect the differences. + File.open( temp_file, 'w') { |f| f.write( project.show(sha, File.join('src', 'language_en.h')) ) } + header_old = PoHeaderFile.new(temp_file) + header_new = PoHeaderFile.new(@@default_en) + File.delete( temp_file ) + + + # Compare each item in the current version with the value, if any, + # in the previous version in order to build a list of fuzzy stuff. + fuzzy_list = [] + header_new.items.each do |key, value| + value.each do |plural_key, plural_value| + new_value = plural_value[:string] + old_value = header_old.items.include?(key) ? header_old.items[key][plural_key][:string] : nil + unless old_value == new_value + fuzzy_list << key + end + + end + end + fuzzy_list.uniq! + end + + + # We're ready to generate the POT, which requires nothing special. + converter = PoConverter.new + unless converter.convert_to_po( nil, nil) + error_count += 1 + puts 'There was an issue generating the POT. Will continue anyway.' + end + + + # Build a list of header files. Keep this list instead of counting + # on reading the working directory later. + header_path = File.join(pwd, 'src', 'language_*.h') + header_list = nil + Dir.chdir(File.join(pwd, 'src')) do + header_list = Dir.glob('language_*.h') + end + header_list.delete('language_en.h') + + + # Building the POs is straight forward. + header_list.each do |input_file| + filename = File.join(pwd, 'src', input_file) + converter = PoConverter.new + error_count = converter.convert_to_po( filename, nil, fuzzy_list ) ? error_count : error_count + 1 + end + + + # Building the Headers is straight forward, too. + header_list.each do |input_file| + filename = "#{File.basename(input_file, '.*')}.po" + converter = PoConverter.new + error_count = converter.convert_to_h( filename, nil ) ? error_count : error_count + 1 + end + + if error_count == 0 + puts 'rebase exited without errors.' + else + puts "rebase exited with errors #{error_count} time(s). Consider using the --verbose or --debug options." + exit 1 + end + end # msgfmt + + ######################################################### # set_options # Handles command line options. diff --git a/localize/translations/tidy.pot b/localize/translations/tidy.pot index ecdef7f..6253850 100644 --- a/localize/translations/tidy.pot +++ b/localize/translations/tidy.pot @@ -5,10 +5,10 @@ msgstr "" "Plural-Forms: nplurals=2; plural=n != 1;\n" "X-Generator: HTML Tidy poconvert.rb\n" "Project-Id-Version: \n" -"POT-Creation-Date: 2016-03-24 10:59:07\n" +"POT-Creation-Date: 2016-03-24 11:36:17\n" "Last-Translator: jderry\n" "Language-Team: \n" -"BAD" + #. Only translate if a URL to the target language can be found. msgctxt "ACCESS_URL" msgid "http://www.w3.org/WAI/GL" diff --git a/src/language_en_gb.h b/src/language_en_gb.h index d84b7f9..a89f0fe 100644 --- a/src/language_en_gb.h +++ b/src/language_en_gb.h @@ -28,7 +28,7 @@ * * Orginating PO file metadata: * PO_LAST_TRANSLATOR=jderry - * PO_REVISION_DATE=2016-02-17 20:04:18 + * PO_REVISION_DATE=2016-03-24 10:59:55 */ #ifdef _MSC_VER @@ -65,19 +65,14 @@ static languageDefinition language_en_gb = { whichPluralForm_en_gb, { {/* Specify the ll or ll_cc language code here. */ TIDY_LANGUAGE, 0, "en_gb" }, - {/* This console output should be limited to 78 characters per line. */ - TEXT_USING_FONT, 0, + { TEXT_USING_FONT, 0, "You are recommended to use CSS to specify the font and\n" "properties such as its size and colour. This will reduce\n" "the size of HTML files and make them easier to maintain\n" "compared with using elements.\n\n" }, - {/* This console output should be limited to 78 characters per line. */ - TEXT_USING_BODY, 0, "You are recommended to use CSS to specify page and link colours\n" - }, - {/* This console output should be limited to 78 characters per line. - - The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */ - TEXT_GENERAL_INFO_PLEA, 0, + { TEXT_USING_BODY, 0, "You are recommended to use CSS to specify page and link colours\n" }, + { TEXT_GENERAL_INFO_PLEA, 0, "\n" "Would you like to see Tidy in proper, British English? Please consider \n" "helping us to localise HTML Tidy. For details please see \n" @@ -96,15 +91,7 @@ static languageDefinition language_en_gb = { whichPluralForm_en_gb, { { COLOR_CONTRAST_VISITED_LINK, 0, "[2.2.1.4]: poor colour contrast (visited link)." }, #endif /* SUPPORT_ACCESSIBILITY_CHECKS */ - {/* Important notes for translators: - - Use only , , , , and -
. - - Entities, tags, attributes, etc., should be enclosed in . - - Option values should be enclosed in . - - It's very important that
be self-closing! - - The strings "Tidy" and "HTML Tidy" are the program name and must not - be translated. */ - TidyMergeDivs, 0, + { TidyMergeDivs, 0, "This option can be used to modify the behaviour of clean when " "set to yes." "
" @@ -120,15 +107,7 @@ static languageDefinition language_en_gb = { whichPluralForm_en_gb, { "<div> are discarded with the exception of " "class and style. " }, - {/* Important notes for translators: - - Use only , , , , and -
. - - Entities, tags, attributes, etc., should be enclosed in . - - Option values should be enclosed in . - - It's very important that
be self-closing! - - The strings "Tidy" and "HTML Tidy" are the program name and must not - be translated. */ - TidyMergeSpans, 0, + { TidyMergeSpans, 0, "This option can be used to modify the behaviour of clean when " "set to yes." "
" @@ -137,15 +116,7 @@ static languageDefinition language_en_gb = { whichPluralForm_en_gb, { "
" "The algorithm is identical to the one used by merge-divs. " }, - {/* Important notes for translators: - - Use only , , , , and -
. - - Entities, tags, attributes, etc., should be enclosed in . - - Option values should be enclosed in . - - It's very important that
be self-closing! - - The strings "Tidy" and "HTML Tidy" are the program name and must not - be translated. */ - TidyReplaceColor, 0, + { TidyReplaceColor, 0, "This option specifies if Tidy should replace numeric values in colour " "attributes with HTML/XHTML colour names where defined, e.g. replace " "#ffffff with white. " diff --git a/src/language_es.h b/src/language_es.h index c0d8255..db09f2a 100644 --- a/src/language_es.h +++ b/src/language_es.h @@ -28,7 +28,7 @@ * * Orginating PO file metadata: * PO_LAST_TRANSLATOR=jderry - * PO_REVISION_DATE=2016-03-23 14:49:53 + * PO_REVISION_DATE=2016-03-24 10:59:55 */ #ifdef _MSC_VER diff --git a/src/language_es_mx.h b/src/language_es_mx.h index c794df4..5574b3b 100644 --- a/src/language_es_mx.h +++ b/src/language_es_mx.h @@ -28,7 +28,7 @@ * * Orginating PO file metadata: * PO_LAST_TRANSLATOR=jderry - * PO_REVISION_DATE=2016-02-17 20:04:18 + * PO_REVISION_DATE=2016-03-24 10:59:55 */ #ifdef _MSC_VER @@ -65,9 +65,7 @@ static languageDefinition language_es_mx = { whichPluralForm_es_mx, { {/* Specify the ll or ll_cc language code here. */ TIDY_LANGUAGE, 0, "es_mx" }, - {/* This console output should be limited to 78 characters per line. - - The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */ - TEXT_GENERAL_INFO_PLEA, 0, + { TEXT_GENERAL_INFO_PLEA, 0, "\n" "¿Le gustaría ver Tidy en adecuada, español mexicano? Por favor considere \n" "ayudarnos a localizar HTML Tidy. Para más detalles consulte \n" diff --git a/src/language_zh_cn.h b/src/language_zh_cn.h index 0266b34..1c35fef 100644 --- a/src/language_zh_cn.h +++ b/src/language_zh_cn.h @@ -28,7 +28,7 @@ * * Orginating PO file metadata: * PO_LAST_TRANSLATOR=jderry - * PO_REVISION_DATE=2016-02-17 20:04:18 + * PO_REVISION_DATE=2016-03-24 10:59:55 */ #ifdef _MSC_VER @@ -68,12 +68,8 @@ static languageDefinition language_zh_cn = { whichPluralForm_zh_cn, { { FILE_CANT_OPEN, 0, "无法打开”%s”\n" }, { LINE_COLUMN_STRING, 0, "行 %d 列 %d - " }, { STRING_CONTENT_LOOKS, 0, "文档内容看起来像 %s" }, - {/* The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */ - TC_STRING_VERS_A, 0, "HTML Tidy 用于 %s 版本 %s" - }, - {/* The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */ - TC_STRING_VERS_B, 0, "HTML Tidy 版本 %s" - }, + { TC_STRING_VERS_A, 0, "HTML Tidy 用于 %s 版本 %s" }, + { TC_STRING_VERS_B, 0, "HTML Tidy 版本 %s" }, {/* This MUST be present and last. */ TIDY_MESSAGE_TYPE_LAST, 0, NULL