summaryrefslogtreecommitdiff
path: root/blockly/i18n/xliff_to_json.py
diff options
context:
space:
mode:
Diffstat (limited to 'blockly/i18n/xliff_to_json.py')
-rwxr-xr-xblockly/i18n/xliff_to_json.py232
1 files changed, 232 insertions, 0 deletions
diff --git a/blockly/i18n/xliff_to_json.py b/blockly/i18n/xliff_to_json.py
new file mode 100755
index 0000000..b38b4d6
--- /dev/null
+++ b/blockly/i18n/xliff_to_json.py
@@ -0,0 +1,232 @@
+#!/usr/bin/python
+
+# Converts .xlf files into .json files for use at http://translatewiki.net.
+#
+# Copyright 2013 Google Inc.
+# https://developers.google.com/blockly/
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+from xml.dom import minidom
+from common import InputError
+from common import write_files
+
+# Global variables
+args = None # Parsed command-line arguments.
+
+
+def _parse_trans_unit(trans_unit):
+ """Converts a trans-unit XML node into a more convenient dictionary format.
+
+ Args:
+ trans_unit: An XML representation of a .xlf translation unit.
+
+ Returns:
+ A dictionary with useful information about the translation unit.
+ The returned dictionary is guaranteed to have an entry for 'key' and
+ may have entries for 'source', 'target', 'description', and 'meaning'
+ if present in the argument.
+
+ Raises:
+ InputError: A required field was not present.
+ """
+
+ def get_value(tag_name):
+ elts = trans_unit.getElementsByTagName(tag_name)
+ if not elts:
+ return None
+ elif len(elts) == 1:
+ return ''.join([child.toxml() for child in elts[0].childNodes])
+ else:
+ raise InputError('', 'Unable to extract ' + tag_name)
+
+ result = {}
+ key = trans_unit.getAttribute('id')
+ if not key:
+ raise InputError('', 'id attribute not found')
+ result['key'] = key
+
+ # Get source and target, if present.
+ try:
+ result['source'] = get_value('source')
+ result['target'] = get_value('target')
+ except InputError, e:
+ raise InputError(key, e.msg)
+
+ # Get notes, using the from value as key and the data as value.
+ notes = trans_unit.getElementsByTagName('note')
+ for note in notes:
+ from_value = note.getAttribute('from')
+ if from_value and len(note.childNodes) == 1:
+ result[from_value] = note.childNodes[0].data
+ else:
+ raise InputError(key, 'Unable to extract ' + from_value)
+
+ return result
+
+
+def _process_file(filename):
+ """Builds list of translation units from input file.
+
+ Each translation unit in the input file includes:
+ - an id (opaquely generated by Soy)
+ - the Blockly name for the message
+ - the text in the source language (generally English)
+ - a description for the translator
+
+ The Soy and Blockly ids are joined with a hyphen and serve as the
+ keys in both output files. The value is the corresponding text (in the
+ <lang>.json file) or the description (in the qqq.json file).
+
+ Args:
+ filename: The name of an .xlf file produced by Closure.
+
+ Raises:
+ IOError: An I/O error occurred with an input or output file.
+ InputError: The input file could not be parsed or lacked required
+ fields.
+
+ Returns:
+ A list of dictionaries produced by parse_trans_unit().
+ """
+ try:
+ results = [] # list of dictionaries (return value)
+ names = [] # list of names of encountered keys (local variable)
+ try:
+ parsed_xml = minidom.parse(filename)
+ except IOError:
+ # Don't get caught by below handler
+ raise
+ except Exception, e:
+ print
+ raise InputError(filename, str(e))
+
+ # Make sure needed fields are present and non-empty.
+ for trans_unit in parsed_xml.getElementsByTagName('trans-unit'):
+ unit = _parse_trans_unit(trans_unit)
+ for key in ['description', 'meaning', 'source']:
+ if not key in unit or not unit[key]:
+ raise InputError(filename + ':' + unit['key'],
+ key + ' not found')
+ if unit['description'].lower() == 'ibid':
+ if unit['meaning'] not in names:
+ # If the term has not already been described, the use of 'ibid'
+ # is an error.
+ raise InputError(
+ filename,
+ 'First encountered definition of: ' + unit['meaning']
+ + ' has definition: ' + unit['description']
+ + '. This error can occur if the definition was not'
+ + ' provided on the first appearance of the message'
+ + ' or if the source (English-language) messages differ.')
+ else:
+ # If term has already been described, 'ibid' was used correctly,
+ # and we output nothing.
+ pass
+ else:
+ if unit['meaning'] in names:
+ raise InputError(filename,
+ 'Second definition of: ' + unit['meaning'])
+ names.append(unit['meaning'])
+ results.append(unit)
+
+ return results
+ except IOError, e:
+ print 'Error with file {0}: {1}'.format(filename, e.strerror)
+ sys.exit(1)
+
+
+def sort_units(units, templates):
+ """Sorts the translation units by their definition order in the template.
+
+ Args:
+ units: A list of dictionaries produced by parse_trans_unit()
+ that have a non-empty value for the key 'meaning'.
+ templates: A string containing the Soy templates in which each of
+ the units' meanings is defined.
+
+ Returns:
+ A new list of translation units, sorted by the order in which
+ their meaning is defined in the templates.
+
+ Raises:
+ InputError: If a meaning definition cannot be found in the
+ templates.
+ """
+ def key_function(unit):
+ match = re.search(
+ '\\smeaning\\s*=\\s*"{0}"\\s'.format(unit['meaning']),
+ templates)
+ if match:
+ return match.start()
+ else:
+ raise InputError(args.templates,
+ 'msg definition for meaning not found: ' +
+ unit['meaning'])
+ return sorted(units, key=key_function)
+
+
+def main():
+ """Parses arguments and processes the specified file.
+
+ Raises:
+ IOError: An I/O error occurred with an input or output file.
+ InputError: Input files lacked required fields.
+ """
+ # Set up argument parser.
+ parser = argparse.ArgumentParser(description='Create translation files.')
+ parser.add_argument(
+ '--author',
+ default='Ellen Spertus <ellen.spertus@gmail.com>',
+ help='name and email address of contact for translators')
+ parser.add_argument('--lang', default='en',
+ help='ISO 639-1 source language code')
+ parser.add_argument('--output_dir', default='json',
+ help='relative directory for output files')
+ parser.add_argument('--xlf', help='file containing xlf definitions')
+ parser.add_argument('--templates', default=['template.soy'], nargs='+',
+ help='relative path to Soy templates, comma or space '
+ 'separated (used for ordering messages)')
+ global args
+ args = parser.parse_args()
+
+ # Make sure output_dir ends with slash.
+ if (not args.output_dir.endswith(os.path.sep)):
+ args.output_dir += os.path.sep
+
+ # Process the input file, and sort the entries.
+ units = _process_file(args.xlf)
+ files = []
+ for arg in args.templates:
+ for filename in arg.split(','):
+ filename = filename.strip();
+ if filename:
+ with open(filename) as myfile:
+ files.append(' '.join(line.strip() for line in myfile))
+ sorted_units = sort_units(units, ' '.join(files))
+
+ # Write the output files.
+ write_files(args.author, args.lang, args.output_dir, sorted_units, True)
+
+ # Delete the input .xlf file.
+ os.remove(args.xlf)
+ print('Removed ' + args.xlf)
+
+
+if __name__ == '__main__':
+ main()