Index: source/tools/i18n/checkTranslationsForSpam.py =================================================================== --- source/tools/i18n/checkTranslationsForSpam.py +++ source/tools/i18n/checkTranslationsForSpam.py @@ -1,7 +1,6 @@ -#!/usr/bin/env python2 -# -*- coding:utf-8 -*- +#!/usr/bin/env python3 # -# Copyright (C) 2014 Wildfire Games. +# Copyright (C) 2020 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify @@ -17,84 +16,88 @@ # You should have received a copy of the GNU General Public License # along with 0 A.D. If not, see . -from __future__ import absolute_import, division, print_function, unicode_literals - import codecs, os, re, sys -from pology.catalog import Catalog -from pology.message import Message +from babel.messages.catalog import Catalog +from babel.messages.pofile import read_po +import multiprocessing l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__)) projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir)) l10nFolderName = "l10n" - def checkTranslationsForSpam(inputFilePath): - - print(u"Checking", inputFilePath) - templateCatalog = Catalog(inputFilePath) - + print(f"Checking {inputFilePath}") + templateCatalog = read_po(open(inputFilePath, "r+")) # If language codes were specified on the command line, filter by those. filters = sys.argv[1:] # Load existing translation catalogs. existingTranslationCatalogs = [] l10nFolderPath = os.path.dirname(inputFilePath) + inputFileName = os.path.basename(inputFilePath) - # .pot is one letter longer than .po, but the dot that separates the locale - # code from the rest of the filename in .po files makes up for that. - charactersToSkip = len(os.path.basename(inputFilePath)) + for filename in os.listdir(str(l10nFolderPath)): + if filename.startswith("long") or not filename.endswith(".po"): + continue + if filename.split(".")[1] != inputFileName.split(".")[0]: + continue + if not filters or filename.split(".")[0] in filters: + existingTranslationCatalogs.append( + read_po(open(os.path.join(l10nFolderPath, filename), 'r+'), locale=filename.split('.')[0]) + ) - for filename in os.listdir(l10nFolderPath): - if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long": - if not filters or filename[:-charactersToSkip] in filters: - if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]: - existingTranslationCatalogs.append([filename[:-charactersToSkip], os.path.join(l10nFolderPath, filename)]) + urlPattern = re.compile(r"https?:\/\/(?:[a-zA-Z]|[0-9]|[-_$@./&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+") - urlPattern = re.compile(u"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+") + # Check that there are no spam URLs. + # Loop through all messages in the .POT catalog for URLs. + # For each, check for the corresponding key in the .PO catalogs. + # If found, check that URLS in the .PO keys are the same as those in the .POT key. + for templateMessage in templateCatalog: + templateUrls = set(urlPattern.findall( + templateMessage.id[0] if templateMessage.pluralizable else templateMessage.id + )) + # As a sanity check, verify that the template message is coherent + if templateMessage.pluralizable: + pluralUrls = set(urlPattern.findall(templateMessage.id[1])) + if pluralUrls.difference(templateUrls): + print(f"{inputFilePath} - Different URLs in singular and plural source strings" + f"for '{templateMessage}' in '{inputFilePath}'") - # Check the URLs in translations against the URLs in the translation template. - for languageCode, pofile in existingTranslationCatalogs: - translationCatalog = Catalog(pofile) - for templateMessage in templateCatalog: - translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid) - if translationMessage: - templateSingularString = templateMessage.msgid - templateUrls = urlPattern.findall(templateMessage.msgid) - # Assert that the same URL is used in both the plural and singular forms. - if templateMessage.msgid_plural and len(templateMessage.msgstr) > 1: - pluralUrls = urlPattern.findall(templateMessage.msgstr[0]) - for url in pluralUrls: - if url not in templateUrls: - print(u"Different URLs in singular and plural source strings for ‘{}’ in ‘{}’".format( - templateMessage.msgid, - inputFilePath)) - for translationString in translationMessage[0].msgstr: - translationUrls = urlPattern.findall(translationString) - for translationUrl in translationUrls: - if translationUrl not in templateUrls: - print(u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}".format( - languageCode, - translationUrl, - u", ".join(templateUrls))) + for translationCatalog in existingTranslationCatalogs: + translationMessage = translationCatalog.get(templateMessage.id, templateMessage.context) + if not translationMessage: + continue + translationUrls = set(urlPattern.findall( + translationMessage.string[0] if translationMessage.pluralizable else translationMessage.string + )) + unknown_urls = translationUrls.difference(templateUrls) + if unknown_urls: + print(f'{inputFilePath} - {translationCatalog.locale}: ' + f'Found unknown URL(s) {", ".join(unknown_urls)} in the translation ' + f'which do not match any of the URLs in the template: {", ".join(templateUrls)}') + print(f"Done checking {inputFilePath}") def main(): - - print(u"\n WARNING: Remember to regenerate the POT files with “updateTemplates.py” before you run this script.\n POT files are not in the repository.\n") - + print("\n\tWARNING: Remember to regenerate the POT files with “updateTemplates.py”" + "before you run this script.\n\tPOT files are not in the repository.\n") foundPots = 0 for root, folders, filenames in os.walk(projectRootDirectory): - root = root.decode("utf-8") for filename in filenames: if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n": foundPots += 1 - checkTranslationsForSpam(os.path.join(root, filename)) + multiprocessing.Process( + target=checkTranslationsForSpam, + args=(os.path.join(root, filename), ) + ).start() if foundPots == 0: - print(u"This script did not work because no ‘.pot’ files were found.") - print(u"Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex.") - print(u"Then you can run this script to generate ‘.po’ files with the longest strings.") + print( + "This script did not work because no '.pot' files were found. " + "Please run 'updateTemplates.py' to generate the '.pot' files, " + "and run 'pullTranslations.py' to pull the latest translations from Transifex. " + "Then you can run this script to check for spam in translations.") if __name__ == "__main__": Index: source/tools/i18n/creditTranslators.py =================================================================== --- source/tools/i18n/creditTranslators.py +++ source/tools/i18n/creditTranslators.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -# -*- coding:utf-8 -*- # -# Copyright (C) 2019 Wildfire Games. +# Copyright (C) 2020 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify Index: source/tools/i18n/extractors/extractors.py =================================================================== --- source/tools/i18n/extractors/extractors.py +++ source/tools/i18n/extractors/extractors.py @@ -1,5 +1,3 @@ -# -*- coding:utf-8 -*- -# # Copyright (C) 2016 Wildfire Games. # All rights reserved. # @@ -20,8 +18,6 @@ # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from __future__ import absolute_import, division, print_function, unicode_literals - import codecs, re, os, sys import json as jsonParser @@ -39,7 +35,7 @@ """ s = re.split(r"([*][*]?)", mask) p = "" - for i in xrange(len(s)): + for i in range(len(s)): if i % 2 != 0: p = p + "[^/]+" if len(s[i]) == 2: @@ -327,7 +323,7 @@ def extractFromFile(self, filepath): with codecs.open(filepath, "r", 'utf-8') as fileObject: for message, breadcrumbs in self.extractFromString(fileObject.read()): - yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), -1, self.comments + yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), None, self.comments def extractFromString(self, string): self.breadcrumbs = [] @@ -360,7 +356,7 @@ for keyword in dictionary: self.breadcrumbs.append(keyword) if keyword in self.keywords: - if isinstance(dictionary[keyword], unicode): + if isinstance(dictionary[keyword], str): yield dictionary[keyword], self.breadcrumbs elif isinstance(dictionary[keyword], list): for message, breadcrumbs in self.extractList(dictionary[keyword]): @@ -380,7 +376,7 @@ index = 0 for listItem in itemsList: self.breadcrumbs.append(index) - if isinstance(listItem, unicode): + if isinstance(listItem, str): yield listItem, self.breadcrumbs del self.breadcrumbs[-1] index += 1 @@ -388,7 +384,7 @@ def extractDictionary(self, dictionary): for keyword in dictionary: self.breadcrumbs.append(keyword) - if isinstance(dictionary[keyword], unicode): + if isinstance(dictionary[keyword], str): yield dictionary[keyword], self.breadcrumbs del self.breadcrumbs[-1] @@ -429,7 +425,7 @@ attributes = [element.get(attribute) for attribute in self.keywords[keyword]["locationAttributes"] if attribute in element.attrib] breadcrumb = "({attributes})".format(attributes=", ".join(attributes)) if "context" in element.attrib: - context = unicode(element.get("context")) + context = str(element.get("context")) elif "tagAsContext" in self.keywords[keyword]: context = keyword elif "customContext" in self.keywords[keyword]: @@ -442,9 +438,9 @@ for splitText in element.text.split(): # split on whitespace is used for token lists, there, a leading '-' means the token has to be removed, so it's not to be processed here either if splitText[0] != "-": - yield unicode(splitText), None, context, breadcrumb, position, comments + yield str(splitText), None, context, breadcrumb, position, comments else: - yield unicode(element.text), None, context, breadcrumb, position, comments + yield str(element.text), None, context, breadcrumb, position, comments # Hack from http://stackoverflow.com/a/2819788 Index: source/tools/i18n/extractors/jslexer.py =================================================================== --- source/tools/i18n/extractors/jslexer.py +++ source/tools/i18n/extractors/jslexer.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # Copyright (C) 2008-2011 Edgewall Software # Copyright (C) 2013-2014 Wildfire Games. # All rights reserved. @@ -31,8 +29,6 @@ extractor. """ -from __future__ import absolute_import, division, print_function, unicode_literals - from operator import itemgetter import re @@ -128,7 +124,7 @@ escaped_value = escaped.group() if len(escaped_value) == 4: try: - add(unichr(int(escaped_value, 16))) + add(chr(int(escaped_value, 16))) except ValueError: pass else: Index: source/tools/i18n/pullTranslations.py =================================================================== --- source/tools/i18n/pullTranslations.py +++ source/tools/i18n/pullTranslations.py @@ -1,7 +1,6 @@ -#!/usr/bin/env python2 -# -*- coding:utf-8 -*- +#!/usr/bin/env python3 # -# Copyright (C) 2014 Wildfire Games. +# Copyright (C) 2020 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify @@ -17,29 +16,12 @@ # You should have received a copy of the GNU General Public License # along with 0 A.D. If not, see . -""" - Although this script itself should work with both Python 2 and Python 3, it relies on the Transifex Client, which at - this moment (2014-10-23) does not support Python 3 in the latest stable release (0.10). - - As soon as Transifex Client supports Python 3, simply updating its folder should be enough to make this script work - with Python 3 as well. -""" - -from __future__ import absolute_import, division, print_function, unicode_literals - import os, sys -# Python version check. -if sys.version_info[0] != 2: - print(__doc__) - sys.exit() - from txclib.project import Project def main(): - - l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__)) projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir)) l10nFolderName = "l10n" Index: source/tools/i18n/requirements.txt =================================================================== --- /dev/null +++ source/tools/i18n/requirements.txt @@ -0,0 +1,2 @@ +babel~=2.6 +lxml~=4.5 Index: source/tools/i18n/updateTemplates.py =================================================================== --- source/tools/i18n/updateTemplates.py +++ source/tools/i18n/updateTemplates.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- +#!/usr/bin/env python3 # # Copyright (C) 2018 Wildfire Games. # This file is part of 0 A.D. @@ -21,18 +20,38 @@ import codecs, datetime, json, os, string, textwrap -from pology.catalog import Catalog -from pology.message import Message -from pology.monitored import Monpair, Monlist +from babel.messages.catalog import Catalog as BabelCatalog +from babel.messages.pofile import write_po from lxml import etree +import multiprocessing l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__)) projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir)) l10nFolderName = "l10n" messagesFilename = "messages.json" +class Catalog(BabelCatalog): + """""" + def __init__(self, *args, project, **kwargs): + super().__init__(*args, **kwargs) + self._project = project + + @BabelCatalog.mime_headers.getter + def mime_headers(self): + headers = [] + for name, value in super().mime_headers: + if name in { + "PO-Revision-Date", + "POT-Creation-Date", + "MIME-Version", + "Content-Type", + "Content-Transfer-Encoding", + "Plural-Forms"}: + headers.append((name, value)) + + return [('Project-Id-Version', self._project)] + headers def warnAboutUntouchedMods(): """ @@ -41,7 +60,7 @@ modsRootFolder = os.path.join(projectRootDirectory, "binaries", "data", "mods") untouchedMods = {} for modFolder in os.listdir(modsRootFolder): - if modFolder[0] != "_": + if modFolder[0] != "_" and modFolder[0] != '.': if not os.path.exists(os.path.join(modsRootFolder, modFolder, l10nFolderName)): untouchedMods[modFolder] = "There is no '{folderName}' folder in the root folder of this mod.".format(folderName=l10nFolderName) elif not os.path.exists(os.path.join(modsRootFolder, modFolder, l10nFolderName, messagesFilename)): @@ -60,64 +79,79 @@ """.format(folderName=l10nFolderName, filename=messagesFilename) )) +def generatePOT(templateSettings, rootPath): + if "skip" in templateSettings and templateSettings["skip"] == "yes": + return + + print(f'Generating {templateSettings["project"]}') + + inputRootPath = rootPath + if "inputRoot" in templateSettings: + inputRootPath = os.path.join(rootPath, templateSettings["inputRoot"]) + + template = Catalog( + header_comment=( +f""" +# Translation template for {templateSettings["project"]}. +# Copyright (C) {datetime.datetime.now().year} {templateSettings["copyrightHolder"]} +# This file is distributed under the same license as the {templateSettings["project"]} project. +"""), + charset="utf-8", + fuzzy=False, + creation_date=datetime.datetime.now(), + revision_date=datetime.datetime.now(), + locale='en', + project=templateSettings["project"] + ) + + for rule in templateSettings["rules"]: + if "skip" in rule and rule["skip"] == "yes": + return + + options = rule.get("options", {}) + extractorClass = getattr(__import__("extractors.extractors", {}, {}, [rule["extractor"]]), rule["extractor"]) + extractor = extractorClass(inputRootPath, rule["filemasks"], options) + formatFlag = None + if "format" in options: + formatFlag = options["format"] + for message, plural, context, location, comments in extractor.run(): + message_id = (message, plural) if plural else message + + saved_message = template.get(message_id, context) or template.add( + id=message_id, + context=context, + auto_comments=comments, + flags=[formatFlag] if formatFlag and message.find("%") != -1 else [] + ) + saved_message.locations.append(location) + saved_message.flags.discard('python-format') + + write_po( + fileobj=open(os.path.join(rootPath, templateSettings["output"]), "wb+"), + catalog=template, + sort_by_file=True, + ) + print(u"Generated \"{}\" with {} messages.".format(templateSettings["output"], len(template))) def generateTemplatesForMessagesFile(messagesFilePath): with open(messagesFilePath, 'r') as fileObject: settings = json.load(fileObject) - rootPath = os.path.dirname(messagesFilePath) - for templateSettings in settings: - if "skip" in templateSettings and templateSettings["skip"] == "yes": - continue - - inputRootPath = rootPath - if "inputRoot" in templateSettings: - inputRootPath = os.path.join(rootPath, templateSettings["inputRoot"]) - - template = Catalog(os.path.join(rootPath, templateSettings["output"]), create=True, truncate=True) - h = template.update_header( - templateSettings["project"], - "Translation template for %project.", - "Copyright (C) {year} {holder}".format( - year=datetime.datetime.now().year, - holder=templateSettings["copyrightHolder"] - ), - "This file is distributed under the same license as the %project project.", - plforms="nplurals=2; plural=(n != 1);" - ) - h.remove_field("Report-Msgid-Bugs-To") - h.remove_field("Last-Translator") - h.remove_field("Language-Team") - h.remove_field("Language") - h.author = Monlist() - - for rule in templateSettings["rules"]: - if "skip" in rule and rule["skip"] == "yes": - continue - - options = rule.get("options", {}) - extractorClass = getattr(__import__("extractors.extractors", {}, {}, [rule["extractor"]]), rule["extractor"]) - extractor = extractorClass(inputRootPath, rule["filemasks"], options) - formatFlag = None - if "format" in options: - formatFlag = options["format"] - for message, plural, context, location, comments in extractor.run(): - msg = Message({"msgid": message, "msgid_plural": plural, "msgctxt": context, "auto_comment": comments, "flag": [formatFlag] if formatFlag and string.find(message, "%") != -1 else None, "source": [location]}) - if template.get(msg): - template.get(msg).source.append(Monpair(location)) - else: - template.add(msg) - - template.set_encoding("utf-8") - template.sync(fitplural=True) - print(u"Generated \"{}\" with {} messages.".format(templateSettings["output"], len(template))) + multiprocessing.Process( + target=generatePOT, + args=(templateSettings, os.path.dirname(messagesFilePath)) + ).start() def main(): - - for root, folders, filenames in os.walk(projectRootDirectory): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--scandir", help="Directory to start scanning for l10n folders in." + "Type '.' for current working directory") + args = parser.parse_args() + for root, folders, filenames in os.walk(args.scandir or projectRootDirectory): for folder in folders: if folder == l10nFolderName: messagesFilePath = os.path.join(root, folder, messagesFilename)