Index: ps/trunk/source/tools/i18n/checkTranslations.py =================================================================== --- ps/trunk/source/tools/i18n/checkTranslations.py (revision 26338) +++ ps/trunk/source/tools/i18n/checkTranslations.py (revision 26339) @@ -1,123 +1,123 @@ #!/usr/bin/env python3 # -# Copyright (C) 2021 Wildfire Games. +# Copyright (C) 2022 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # 0 A.D. is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with 0 A.D. If not, see . import sys, os, re, multiprocessing from i18n_helper import l10nFolderName, projectRootDirectory from i18n_helper.catalog import Catalog from i18n_helper.globber import getCatalogs VERBOSE = 0 class MessageChecker: """Checks all messages in a catalog against a regex.""" def __init__(self, human_name, regex): self.regex = re.compile(regex, re.IGNORECASE) self.human_name = human_name def check(self, inputFilePath, templateMessage, translatedCatalogs): patterns = set(self.regex.findall( templateMessage.id[0] if templateMessage.pluralizable else templateMessage.id )) # As a sanity check, verify that the template message is coherent. # Note that these tend to be false positives. # TODO: the pssible tags are usually comments, we ought be able to find them. if templateMessage.pluralizable: pluralUrls = set(self.regex.findall(templateMessage.id[1])) if pluralUrls.difference(patterns): print(f"{inputFilePath} - Different {self.human_name} in singular and plural source strings " f"for '{templateMessage}' in '{inputFilePath}'") for translationCatalog in translatedCatalogs: translationMessage = translationCatalog.get( templateMessage.id, templateMessage.context) if not translationMessage: continue translatedPatterns = set(self.regex.findall( translationMessage.string[0] if translationMessage.pluralizable else translationMessage.string )) unknown_patterns = translatedPatterns.difference(patterns) if unknown_patterns: print(f'{inputFilePath} - {translationCatalog.locale}: ' f'Found unknown {self.human_name} {", ".join(["`" + x + "`" for x in unknown_patterns])} in the translation ' f'which do not match any of the URLs in the template: {", ".join(["`" + x + "`" for x in patterns])}') if translationMessage.pluralizable: for indx, val in enumerate(translationMessage.string): if indx == 0: continue translatedPatternsMulti = set(self.regex.findall(val)) unknown_patterns_multi = translatedPatternsMulti.difference(pluralUrls) if unknown_patterns_multi: print(f'{inputFilePath} - {translationCatalog.locale}: ' f'Found unknown {self.human_name} {", ".join(["`" + x + "`" for x in unknown_patterns_multi])} in the pluralised translation ' f'which do not match any of the URLs in the template: {", ".join(["`" + x + "`" for x in pluralUrls])}') def check_translations(inputFilePath): if VERBOSE: print(f"Checking {inputFilePath}") templateCatalog = Catalog.readFrom(inputFilePath) # If language codes were specified on the command line, filter by those. filters = sys.argv[1:] # Load existing translation catalogs. existingTranslationCatalogs = getCatalogs(inputFilePath, filters) spam = MessageChecker("url", r"https?://(?:[a-z0-9-_$@./&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+") sprintf = MessageChecker("sprintf", r"%\([^)]+\)s") tags = MessageChecker("tag", r"[^\\][^\\](\[[^]]+/?\])") # Check that there are no spam URLs. # Loop through all messages in the .POT catalog for URLs. # For each, check for the corresponding key in the .PO catalogs. # If found, check that URLS in the .PO keys are the same as those in the .POT key. for templateMessage in templateCatalog: spam.check(inputFilePath, templateMessage, existingTranslationCatalogs) sprintf.check(inputFilePath, templateMessage, existingTranslationCatalogs) tags.check(inputFilePath, templateMessage, existingTranslationCatalogs) if VERBOSE: print(f"Done checking {inputFilePath}") def main(): print("\n\tWARNING: Remember to regenerate the POT files with “updateTemplates.py” " "before you run this script.\n\tPOT files are not in the repository.\n") foundPots = 0 for root, folders, filenames in os.walk(projectRootDirectory): for filename in filenames: if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == l10nFolderName: foundPots += 1 multiprocessing.Process( target=check_translations, args=(os.path.join(root, filename), ) ).start() if foundPots == 0: print( "This script did not work because no '.pot' files were found. " "Please run 'updateTemplates.py' to generate the '.pot' files, " "and run 'pullTranslations.py' to pull the latest translations from Transifex. " "Then you can run this script to check for spam in translations.") if __name__ == "__main__": main() Index: ps/trunk/source/tools/i18n/cleanTranslationFiles.py =================================================================== --- ps/trunk/source/tools/i18n/cleanTranslationFiles.py (revision 26338) +++ ps/trunk/source/tools/i18n/cleanTranslationFiles.py (revision 26339) @@ -1,67 +1,67 @@ #!/usr/bin/env python3 # -# Copyright (C) 2021 Wildfire Games. +# Copyright (C) 2022 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # 0 A.D. is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with 0 A.D. If not, see . """ This file removes unneeded personal data from the translators. Most notably the e-mail addresses. We need to translators' nicks for the credits, but no more data is required. TODO: ideally we don't even pull the e-mail addresses in the .po files. However that needs to be fixed on the transifex side, see rP25896. For now strip the e-mails using this script. """ import sys, os, glob, re, fileinput from i18n_helper import l10nFolderName, transifexClientFolder, projectRootDirectory def main(): # Prepare some regexes. commentMatch = re.compile('#.*') translatorMatch = re.compile("(# [^,<]*)(?: <.*>)?(?:, [0-9,-]{4,9})") lastTranslatorMatch = re.compile("(\"Last-Translator: [^,<]*)(?: <.*>)?( ?\\\\n\")") for root, folders, filenames in os.walk(projectRootDirectory): for folder in folders: if folder == l10nFolderName: if os.path.exists(os.path.join(root, folder, transifexClientFolder)): path = os.path.join(root, folder, "*.po") files = glob.glob(path) for file in files: usernames = [] reached = False for line in fileinput.input(file.replace("\\", "/"), inplace=1): if reached: if line == "# \n": line = "" m = translatorMatch.match(line) if m: if m.group(1) in usernames: line = "" else: line = m.group(1) + "\n" usernames.append(m.group(1)) m2 = lastTranslatorMatch.match(line) if m2: line = m2.group(1) + "\\n\"\n" elif line.strip() == "# Translators:": reached = True sys.stdout.write(line) if __name__ == "__main__": main() Index: ps/trunk/source/tools/i18n/creditTranslators.py =================================================================== --- ps/trunk/source/tools/i18n/creditTranslators.py (revision 26338) +++ ps/trunk/source/tools/i18n/creditTranslators.py (revision 26339) @@ -1,172 +1,172 @@ #!/usr/bin/env python3 # -# Copyright (C) 2021 Wildfire Games. +# Copyright (C) 2022 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # 0 A.D. is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with 0 A.D. If not, see . """ This file updates the translators credits located in the public mod GUI files, using translators names from the .po files. If translators change their names on Transifex, the script will remove the old names. TODO: It should be possible to add people in the list manually, and protect them against automatic deletion. This has not been needed so far. A possibility would be to add an optional boolean entry to the dictionary containing the name. Translatable strings will be extracted from the generated file, so this should be run once before updateTemplates.py. """ import json, os, glob, re from i18n_helper import l10nFolderName, transifexClientFolder, projectRootDirectory # We credit everyone that helps translating even if the translations don't # make it into the game. # Note: Needs to be edited manually when new languages are added on Transifex. langs = { 'af': 'Afrikaans', 'ar': 'الدارجة (Arabic)', 'ast': 'Asturianu', 'az': 'Azərbaycan dili', 'bar': 'Bairisch', 'be': 'Беларуская мова (Belarusian)', 'bg': 'Български (Bulgarian)', 'bn': 'বাংলা (Bengali)', 'br': 'Brezhoneg', 'ca': 'Català', 'cs': 'Čeština ', 'cy': 'Cymraeg', 'da': 'Dansk', 'de': 'Deutsch', 'el': 'Ελληνικά (Greek)', 'en_GB': 'English (United Kingdom)', 'eo': 'Esperanto', 'es': 'Español', 'es_AR': 'Español (Argentina)', 'es_CL': 'Español (Chile)', 'es_MX': 'Español (Mexico)', 'et': 'Eesti keel', 'eu': 'Euskara', 'fa': 'فارسی (Farsi)', 'fi': 'Suomi', 'fr': 'Français', 'fr_CA': 'Français (Canada)', 'frp': 'Franco-Provençal (Arpitan)', 'ga': 'Gaeilge', 'gd': 'Gàidhlig', 'gl': 'Galego', 'he': 'עברית (Hebrew)', 'hi': 'हिन्दी (Hindi)', 'hr': 'Croatian', 'hu': 'Magyar', 'hy': 'Հայերէն (Armenian)', 'id': 'Bahasa Indonesia', 'it': 'Italiano', 'ja': '日本語 (Japanese)', 'jbo': 'Lojban', 'ka': 'ქართული ენა (Georgian)', 'ko': '한국어 (Korean)', 'krl': 'Karjalan kieli', 'ku': 'کوردی (Kurdish)', 'la': 'Latin', 'lt': 'Lietuvių kalba', 'lv': 'Latviešu valoda', 'mk': 'македонски (Macedonian)', 'ml': 'മലയാളം (Malayalam)', 'mr': 'मराठी (Marathi)', 'ms': 'بهاس ملايو (Malay)', 'nb': 'Norsk Bokmål', 'nl': 'Nederlands', 'pl': 'Polski', 'pt_BR': 'Português (Brazil)', 'pt_PT': 'Português (Portugal)', 'ro': 'Românește', 'ru': 'Русский язык (Russian)', 'sk': 'Slovenčina', 'sl': 'Slovenščina', 'sq': 'Shqip', 'sr': 'Cрпски (Serbian)', 'sv': 'Svenska', 'szl': 'ślōnskŏ gŏdka', 'ta_IN': 'தமிழ் (India)', 'te': 'తెలుగు (Telugu)', 'th': 'ภาษาไทย (Thai)', 'tl': 'Tagalog', 'tr': 'Türkçe (Turkish)', 'uk': 'Українська (Ukrainian)', 'uz': 'Ўзбек тили (Uzbek)', 'vi': 'Tiếng Việt (Vietnamese)', 'zh': '中文, 汉语, 漢語 (Chinese)', 'zh_TW': '臺灣話 Chinese (Taiwan)'} poLocations = [] for root, folders, filenames in os.walk(projectRootDirectory): for folder in folders: if folder == l10nFolderName: if os.path.exists(os.path.join(root, folder, transifexClientFolder)): poLocations.append(os.path.join(root, folder)) creditsLocation = os.path.join(projectRootDirectory, 'binaries', 'data', 'mods', 'public', 'gui', 'credits', 'texts', 'translators.json') # This dictionnary will hold creditors lists for each language, indexed by code langsLists = {} # Create the new JSON data newJSONData = {'Title': 'Translators', 'Content': []} # Now go through the list of languages and search the .po files for people # Prepare some regexes translatorMatch = re.compile('# (.*)') deletedUsernameMatch = re.compile('[0-9a-f]{32}') # Search for lang in langs.keys(): if lang not in langsLists.keys(): langsLists[lang] = [] for location in poLocations: files = glob.glob(os.path.join(location, lang + '.*.po')) for file in files: poFile = open(file.replace('\\', '/'), encoding='utf-8') reached = False for line in poFile: if reached: m = translatorMatch.match(line) if not m: break username = m.group(1) if not deletedUsernameMatch.match(username): langsLists[lang].append(username) if line.strip() == '# Translators:': reached = True poFile.close() # Sort and remove duplicates # Sorting should ignore case to have a neat credits list langsLists[lang] = sorted(set(langsLists[lang]), key=lambda s: s.lower()) # Now insert the new data into the new JSON file for (langCode, langList) in sorted(langsLists.items()): newJSONData['Content'].append({'LangName': langs[langCode], 'List': []}) for name in langList: newJSONData['Content'][-1]['List'].append({'name': name}) # Save the JSON data to the credits file creditsFile = open(creditsLocation, 'w', encoding='utf-8') json.dump(newJSONData, creditsFile, indent=4) creditsFile.close() Index: ps/trunk/source/tools/i18n/generateDebugTranslation.py =================================================================== --- ps/trunk/source/tools/i18n/generateDebugTranslation.py (revision 26338) +++ ps/trunk/source/tools/i18n/generateDebugTranslation.py (revision 26339) @@ -1,166 +1,166 @@ #!/usr/bin/env python3 # -# Copyright (C) 2021 Wildfire Games. +# Copyright (C) 2022 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # 0 A.D. is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with 0 A.D. If not, see . import argparse import os import sys import multiprocessing from i18n_helper import l10nFolderName, projectRootDirectory from i18n_helper.catalog import Catalog from i18n_helper.globber import getCatalogs DEBUG_PREFIX = 'X_X ' def generate_long_strings(root_path, input_file_name, output_file_name, languages=None): """ Generate the 'long strings' debug catalog. This catalog contains the longest singular and plural string, found amongst all translated languages or a filtered subset. It can be used to check if GUI elements are large enough. The catalog is long.*.po """ print("Generating", output_file_name) input_file_path = os.path.join(root_path, input_file_name) output_file_path = os.path.join(root_path, output_file_name) template_catalog = Catalog.readFrom(input_file_path) # Pretend we write English to get plurals. long_string_catalog = Catalog(locale="en") # Fill catalog with English strings. for message in template_catalog: long_string_catalog.add( id=message.id, string=message.id, context=message.context) # Load existing translation catalogs. existing_translation_catalogs = getCatalogs(input_file_path, languages) # If any existing translation has more characters than the average expansion, use that instead. for translation_catalog in existing_translation_catalogs: for long_string_catalog_message in long_string_catalog: translation_message = translation_catalog.get( long_string_catalog_message.id, long_string_catalog_message.context) if not translation_message or not translation_message.string: continue if not long_string_catalog_message.pluralizable or not translation_message.pluralizable: if len(translation_message.string) > len(long_string_catalog_message.string): long_string_catalog_message.string = translation_message.string continue longest_singular_string = translation_message.string[0] longest_plural_string = translation_message.string[1 if len( translation_message.string) > 1 else 0] candidate_singular_string = long_string_catalog_message.string[0] # There might be between 0 and infinite plural forms. candidate_plural_string = "" for candidate_string in long_string_catalog_message.string[1:]: if len(candidate_string) > len(candidate_plural_string): candidate_plural_string = candidate_string changed = False if len(candidate_singular_string) > len(longest_singular_string): longest_singular_string = candidate_singular_string changed = True if len(candidate_plural_string) > len(longest_plural_string): longest_plural_string = candidate_plural_string changed = True if changed: long_string_catalog_message.string = [ longest_singular_string, longest_plural_string] translation_message = long_string_catalog_message long_string_catalog.writeTo(output_file_path) def generate_debug(root_path, input_file_name, output_file_name): """ Generate a debug catalog to identify untranslated strings. This prefixes all strings with DEBUG_PREFIX, to easily identify untranslated strings while still making the game navigable. The catalog is debug.*.po """ print("Generating", output_file_name) input_file_path = os.path.join(root_path, input_file_name) output_file_path = os.path.join(root_path, output_file_name) template_catalog = Catalog.readFrom(input_file_path) # Pretend we write English to get plurals. out_catalog = Catalog(locale="en") for message in template_catalog: if message.pluralizable: out_catalog.add( id=message.id, string=(DEBUG_PREFIX + message.id[0],), context=message.context) else: out_catalog.add( id=message.id, string=DEBUG_PREFIX + message.id, context=message.context) out_catalog.writeTo(output_file_path) def main(): parser = argparse.ArgumentParser() parser.add_argument("--debug", help="Generate debug localisation to identify non-translated strings.", action="store_true") parser.add_argument("--long", help="Generate 'long strings' localisation to identify GUI elements too small.", action="store_true") parser.add_argument("--languages", nargs="+", help="For long strings, restrict to these languages") args = parser.parse_args() if not args.debug and not args.long: parser.print_help() sys.exit(0) found_pot_files = 0 for root, _, filenames in os.walk(projectRootDirectory): for filename in filenames: if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == l10nFolderName: found_pot_files += 1 if args.debug: multiprocessing.Process( target=generate_debug, args=(root, filename, "debug." + filename[:-1]) ).start() if args.long: multiprocessing.Process( target=generate_long_strings, args=(root, filename, "long." + filename[:-1], args.languages) ).start() if found_pot_files == 0: print("This script did not work because no ‘.pot’ files were found. " "Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex. " "Then you can run this script to generate ‘.po’ files with obvious debug strings.") if __name__ == "__main__": main() Index: ps/trunk/source/tools/i18n/pullTranslations.py =================================================================== --- ps/trunk/source/tools/i18n/pullTranslations.py (revision 26338) +++ ps/trunk/source/tools/i18n/pullTranslations.py (revision 26339) @@ -1,37 +1,37 @@ #!/usr/bin/env python3 # -# Copyright (C) 2021 Wildfire Games. +# Copyright (C) 2022 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # 0 A.D. is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with 0 A.D. If not, see . import os, sys from txclib.project import Project from i18n_helper import l10nFolderName, transifexClientFolder, projectRootDirectory def main(): for root, folders, filenames in os.walk(projectRootDirectory): for folder in folders: if folder == l10nFolderName: if os.path.exists(os.path.join(root, folder, transifexClientFolder)): path = os.path.join(root, folder) os.chdir(path) project = Project(path) project.pull(fetchall=True, force=True, parallel=True) if __name__ == "__main__": main() Index: ps/trunk/source/tools/i18n/updateTemplates.py =================================================================== --- ps/trunk/source/tools/i18n/updateTemplates.py (revision 26338) +++ ps/trunk/source/tools/i18n/updateTemplates.py (revision 26339) @@ -1,124 +1,124 @@ #!/usr/bin/env python3 # -# Copyright (C) 2021 Wildfire Games. +# Copyright (C) 2022 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # 0 A.D. is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with 0 A.D. If not, see . import json, os import multiprocessing from importlib import import_module from lxml import etree from i18n_helper import l10nFolderName, projectRootDirectory from i18n_helper.catalog import Catalog from extractors import extractors messagesFilename = "messages.json" def warnAboutUntouchedMods(): """ Warn about mods that are not properly configured to get their messages extracted. """ modsRootFolder = os.path.join(projectRootDirectory, "binaries", "data", "mods") untouchedMods = {} for modFolder in os.listdir(modsRootFolder): if modFolder[0] != "_" and modFolder[0] != '.': if not os.path.exists(os.path.join(modsRootFolder, modFolder, l10nFolderName)): untouchedMods[modFolder] = "There is no '{folderName}' folder in the root folder of this mod.".format(folderName=l10nFolderName) elif not os.path.exists(os.path.join(modsRootFolder, modFolder, l10nFolderName, messagesFilename)): untouchedMods[modFolder] = "There is no '{filename}' file within the '{folderName}' folder in the root folder of this mod.".format(folderName=l10nFolderName, filename=messagesFilename) if untouchedMods: print("" "Warning: No messages were extracted from the following mods:" "") for mod in untouchedMods: print("• {modName}: {warningMessage}".format(modName=mod, warningMessage=untouchedMods[mod])) print("" f"For this script to extract messages from a mod folder, this mod folder must contain a '{l10nFolderName}' " f"folder, and this folder must contain a '{messagesFilename}' file that describes how to extract messages for the " f"mod. See the folder of the main mod ('public') for an example, and see the documentation for more " f"information." ) def generatePOT(templateSettings, rootPath): if "skip" in templateSettings and templateSettings["skip"] == "yes": return inputRootPath = rootPath if "inputRoot" in templateSettings: inputRootPath = os.path.join(rootPath, templateSettings["inputRoot"]) template = Catalog( project=templateSettings["project"], copyright_holder=templateSettings["copyrightHolder"], locale='en', ) for rule in templateSettings["rules"]: if "skip" in rule and rule["skip"] == "yes": return options = rule.get("options", {}) extractorClass = getattr(import_module("extractors.extractors"), rule['extractor']) extractor = extractorClass(inputRootPath, rule["filemasks"], options) formatFlag = None if "format" in options: formatFlag = options["format"] for message, plural, context, location, comments in extractor.run(): message_id = (message, plural) if plural else message saved_message = template.get(message_id, context) or template.add( id=message_id, context=context, auto_comments=comments, flags=[formatFlag] if formatFlag and message.find("%") != -1 else [] ) saved_message.locations.append(location) saved_message.flags.discard('python-format') template.writeTo(os.path.join(rootPath, templateSettings["output"])) print(u"Generated \"{}\" with {} messages.".format(templateSettings["output"], len(template))) def generateTemplatesForMessagesFile(messagesFilePath): with open(messagesFilePath, 'r') as fileObject: settings = json.load(fileObject) for templateSettings in settings: multiprocessing.Process( target=generatePOT, args=(templateSettings, os.path.dirname(messagesFilePath)) ).start() def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--scandir", help="Directory to start scanning for l10n folders in. " "Type '.' for current working directory") args = parser.parse_args() for root, folders, filenames in os.walk(args.scandir or projectRootDirectory): for folder in folders: if folder == l10nFolderName: messagesFilePath = os.path.join(root, folder, messagesFilename) if os.path.exists(messagesFilePath): generateTemplatesForMessagesFile(messagesFilePath) warnAboutUntouchedMods() if __name__ == "__main__": main()