Index: source/tools/i18n/checkTranslationsForSpam.py
===================================================================
--- source/tools/i18n/checkTranslationsForSpam.py
+++ source/tools/i18n/checkTranslationsForSpam.py
@@ -1,7 +1,6 @@
-#!/usr/bin/env python2
-# -*- coding:utf-8 -*-
+#!/usr/bin/env python3
#
-# Copyright (C) 2014 Wildfire Games.
+# Copyright (C) 2020 Wildfire Games.
# This file is part of 0 A.D.
#
# 0 A.D. is free software: you can redistribute it and/or modify
@@ -17,84 +16,88 @@
# You should have received a copy of the GNU General Public License
# along with 0 A.D. If not, see .
-from __future__ import absolute_import, division, print_function, unicode_literals
-
import codecs, os, re, sys
-from pology.catalog import Catalog
-from pology.message import Message
+from babel.messages.catalog import Catalog
+from babel.messages.pofile import read_po
+import multiprocessing
l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir))
l10nFolderName = "l10n"
-
def checkTranslationsForSpam(inputFilePath):
-
- print(u"Checking", inputFilePath)
- templateCatalog = Catalog(inputFilePath)
-
+ print(f"Checking {inputFilePath}")
+ templateCatalog = read_po(open(inputFilePath, "r+"))
# If language codes were specified on the command line, filter by those.
filters = sys.argv[1:]
# Load existing translation catalogs.
existingTranslationCatalogs = []
l10nFolderPath = os.path.dirname(inputFilePath)
+ inputFileName = os.path.basename(inputFilePath)
- # .pot is one letter longer than .po, but the dot that separates the locale
- # code from the rest of the filename in .po files makes up for that.
- charactersToSkip = len(os.path.basename(inputFilePath))
+ for filename in os.listdir(str(l10nFolderPath)):
+ if filename.startswith("long") or not filename.endswith(".po"):
+ continue
+ if filename.split(".")[1] != inputFileName.split(".")[0]:
+ continue
+ if not filters or filename.split(".")[0] in filters:
+ existingTranslationCatalogs.append(
+ read_po(open(os.path.join(l10nFolderPath, filename), 'r+'), locale=filename.split('.')[0])
+ )
- for filename in os.listdir(l10nFolderPath):
- if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long":
- if not filters or filename[:-charactersToSkip] in filters:
- if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]:
- existingTranslationCatalogs.append([filename[:-charactersToSkip], os.path.join(l10nFolderPath, filename)])
+ urlPattern = re.compile(r"https?:\/\/(?:[a-zA-Z]|[0-9]|[-_$@./&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
- urlPattern = re.compile(u"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
+ # Check that there are no spam URLs.
+ # Loop through all messages in the .POT catalog for URLs.
+ # For each, check for the corresponding key in the .PO catalogs.
+ # If found, check that URLS in the .PO keys are the same as those in the .POT key.
+ for templateMessage in templateCatalog:
+ templateUrls = set(urlPattern.findall(
+ templateMessage.id[0] if templateMessage.pluralizable else templateMessage.id
+ ))
+ # As a sanity check, verify that the template message is coherent
+ if templateMessage.pluralizable:
+ pluralUrls = set(urlPattern.findall(templateMessage.id[1]))
+ if pluralUrls.difference(templateUrls):
+ print(f"{inputFilePath} - Different URLs in singular and plural source strings"
+ f"for '{templateMessage}' in '{inputFilePath}'")
- # Check the URLs in translations against the URLs in the translation template.
- for languageCode, pofile in existingTranslationCatalogs:
- translationCatalog = Catalog(pofile)
- for templateMessage in templateCatalog:
- translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid)
- if translationMessage:
- templateSingularString = templateMessage.msgid
- templateUrls = urlPattern.findall(templateMessage.msgid)
- # Assert that the same URL is used in both the plural and singular forms.
- if templateMessage.msgid_plural and len(templateMessage.msgstr) > 1:
- pluralUrls = urlPattern.findall(templateMessage.msgstr[0])
- for url in pluralUrls:
- if url not in templateUrls:
- print(u"Different URLs in singular and plural source strings for ‘{}’ in ‘{}’".format(
- templateMessage.msgid,
- inputFilePath))
- for translationString in translationMessage[0].msgstr:
- translationUrls = urlPattern.findall(translationString)
- for translationUrl in translationUrls:
- if translationUrl not in templateUrls:
- print(u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}".format(
- languageCode,
- translationUrl,
- u", ".join(templateUrls)))
+ for translationCatalog in existingTranslationCatalogs:
+ translationMessage = translationCatalog.get(templateMessage.id, templateMessage.context)
+ if not translationMessage:
+ continue
+ translationUrls = set(urlPattern.findall(
+ translationMessage.string[0] if translationMessage.pluralizable else translationMessage.string
+ ))
+ unknown_urls = translationUrls.difference(templateUrls)
+ if unknown_urls:
+ print(f'{inputFilePath} - {translationCatalog.locale}: '
+ f'Found unknown URL(s) {", ".join(unknown_urls)} in the translation '
+ f'which do not match any of the URLs in the template: {", ".join(templateUrls)}')
+ print(f"Done checking {inputFilePath}")
def main():
-
- print(u"\n WARNING: Remember to regenerate the POT files with “updateTemplates.py” before you run this script.\n POT files are not in the repository.\n")
-
+ print("\n\tWARNING: Remember to regenerate the POT files with “updateTemplates.py”"
+ "before you run this script.\n\tPOT files are not in the repository.\n")
foundPots = 0
for root, folders, filenames in os.walk(projectRootDirectory):
- root = root.decode("utf-8")
for filename in filenames:
if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n":
foundPots += 1
- checkTranslationsForSpam(os.path.join(root, filename))
+ multiprocessing.Process(
+ target=checkTranslationsForSpam,
+ args=(os.path.join(root, filename), )
+ ).start()
if foundPots == 0:
- print(u"This script did not work because no ‘.pot’ files were found.")
- print(u"Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex.")
- print(u"Then you can run this script to generate ‘.po’ files with the longest strings.")
+ print(
+ "This script did not work because no '.pot' files were found. "
+ "Please run 'updateTemplates.py' to generate the '.pot' files, "
+ "and run 'pullTranslations.py' to pull the latest translations from Transifex. "
+ "Then you can run this script to check for spam in translations.")
if __name__ == "__main__":
Index: source/tools/i18n/creditTranslators.py
===================================================================
--- source/tools/i18n/creditTranslators.py
+++ source/tools/i18n/creditTranslators.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
#
-# Copyright (C) 2019 Wildfire Games.
+# Copyright (C) 2020 Wildfire Games.
# This file is part of 0 A.D.
#
# 0 A.D. is free software: you can redistribute it and/or modify
Index: source/tools/i18n/extractors/extractors.py
===================================================================
--- source/tools/i18n/extractors/extractors.py
+++ source/tools/i18n/extractors/extractors.py
@@ -1,5 +1,3 @@
-# -*- coding:utf-8 -*-
-#
# Copyright (C) 2016 Wildfire Games.
# All rights reserved.
#
@@ -20,8 +18,6 @@
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-from __future__ import absolute_import, division, print_function, unicode_literals
-
import codecs, re, os, sys
import json as jsonParser
@@ -39,7 +35,7 @@
"""
s = re.split(r"([*][*]?)", mask)
p = ""
- for i in xrange(len(s)):
+ for i in range(len(s)):
if i % 2 != 0:
p = p + "[^/]+"
if len(s[i]) == 2:
@@ -327,7 +323,7 @@
def extractFromFile(self, filepath):
with codecs.open(filepath, "r", 'utf-8') as fileObject:
for message, breadcrumbs in self.extractFromString(fileObject.read()):
- yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), -1, self.comments
+ yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), None, self.comments
def extractFromString(self, string):
self.breadcrumbs = []
@@ -360,7 +356,7 @@
for keyword in dictionary:
self.breadcrumbs.append(keyword)
if keyword in self.keywords:
- if isinstance(dictionary[keyword], unicode):
+ if isinstance(dictionary[keyword], str):
yield dictionary[keyword], self.breadcrumbs
elif isinstance(dictionary[keyword], list):
for message, breadcrumbs in self.extractList(dictionary[keyword]):
@@ -380,7 +376,7 @@
index = 0
for listItem in itemsList:
self.breadcrumbs.append(index)
- if isinstance(listItem, unicode):
+ if isinstance(listItem, str):
yield listItem, self.breadcrumbs
del self.breadcrumbs[-1]
index += 1
@@ -388,7 +384,7 @@
def extractDictionary(self, dictionary):
for keyword in dictionary:
self.breadcrumbs.append(keyword)
- if isinstance(dictionary[keyword], unicode):
+ if isinstance(dictionary[keyword], str):
yield dictionary[keyword], self.breadcrumbs
del self.breadcrumbs[-1]
@@ -429,7 +425,7 @@
attributes = [element.get(attribute) for attribute in self.keywords[keyword]["locationAttributes"] if attribute in element.attrib]
breadcrumb = "({attributes})".format(attributes=", ".join(attributes))
if "context" in element.attrib:
- context = unicode(element.get("context"))
+ context = str(element.get("context"))
elif "tagAsContext" in self.keywords[keyword]:
context = keyword
elif "customContext" in self.keywords[keyword]:
@@ -442,9 +438,9 @@
for splitText in element.text.split():
# split on whitespace is used for token lists, there, a leading '-' means the token has to be removed, so it's not to be processed here either
if splitText[0] != "-":
- yield unicode(splitText), None, context, breadcrumb, position, comments
+ yield str(splitText), None, context, breadcrumb, position, comments
else:
- yield unicode(element.text), None, context, breadcrumb, position, comments
+ yield str(element.text), None, context, breadcrumb, position, comments
# Hack from http://stackoverflow.com/a/2819788
Index: source/tools/i18n/extractors/jslexer.py
===================================================================
--- source/tools/i18n/extractors/jslexer.py
+++ source/tools/i18n/extractors/jslexer.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
# Copyright (C) 2008-2011 Edgewall Software
# Copyright (C) 2013-2014 Wildfire Games.
# All rights reserved.
@@ -31,8 +29,6 @@
extractor.
"""
-from __future__ import absolute_import, division, print_function, unicode_literals
-
from operator import itemgetter
import re
@@ -128,7 +124,7 @@
escaped_value = escaped.group()
if len(escaped_value) == 4:
try:
- add(unichr(int(escaped_value, 16)))
+ add(chr(int(escaped_value, 16)))
except ValueError:
pass
else:
Index: source/tools/i18n/pullTranslations.py
===================================================================
--- source/tools/i18n/pullTranslations.py
+++ source/tools/i18n/pullTranslations.py
@@ -1,7 +1,6 @@
-#!/usr/bin/env python2
-# -*- coding:utf-8 -*-
+#!/usr/bin/env python3
#
-# Copyright (C) 2014 Wildfire Games.
+# Copyright (C) 2020 Wildfire Games.
# This file is part of 0 A.D.
#
# 0 A.D. is free software: you can redistribute it and/or modify
@@ -17,29 +16,12 @@
# You should have received a copy of the GNU General Public License
# along with 0 A.D. If not, see .
-"""
- Although this script itself should work with both Python 2 and Python 3, it relies on the Transifex Client, which at
- this moment (2014-10-23) does not support Python 3 in the latest stable release (0.10).
-
- As soon as Transifex Client supports Python 3, simply updating its folder should be enough to make this script work
- with Python 3 as well.
-"""
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
import os, sys
-# Python version check.
-if sys.version_info[0] != 2:
- print(__doc__)
- sys.exit()
-
from txclib.project import Project
def main():
-
-
l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir))
l10nFolderName = "l10n"
Index: source/tools/i18n/requirements.txt
===================================================================
--- /dev/null
+++ source/tools/i18n/requirements.txt
@@ -0,0 +1,2 @@
+babel~=2.6
+lxml~=4.5
Index: source/tools/i18n/updateTemplates.py
===================================================================
--- source/tools/i18n/updateTemplates.py
+++ source/tools/i18n/updateTemplates.py
@@ -1,5 +1,4 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
#
# Copyright (C) 2018 Wildfire Games.
# This file is part of 0 A.D.
@@ -21,18 +20,38 @@
import codecs, datetime, json, os, string, textwrap
-from pology.catalog import Catalog
-from pology.message import Message
-from pology.monitored import Monpair, Monlist
+from babel.messages.catalog import Catalog as BabelCatalog
+from babel.messages.pofile import write_po
from lxml import etree
+import multiprocessing
l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir))
l10nFolderName = "l10n"
messagesFilename = "messages.json"
+class Catalog(BabelCatalog):
+ """"""
+ def __init__(self, *args, project, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._project = project
+
+ @BabelCatalog.mime_headers.getter
+ def mime_headers(self):
+ headers = []
+ for name, value in super().mime_headers:
+ if name in {
+ "PO-Revision-Date",
+ "POT-Creation-Date",
+ "MIME-Version",
+ "Content-Type",
+ "Content-Transfer-Encoding",
+ "Plural-Forms"}:
+ headers.append((name, value))
+
+ return [('Project-Id-Version', self._project)] + headers
def warnAboutUntouchedMods():
"""
@@ -41,7 +60,7 @@
modsRootFolder = os.path.join(projectRootDirectory, "binaries", "data", "mods")
untouchedMods = {}
for modFolder in os.listdir(modsRootFolder):
- if modFolder[0] != "_":
+ if modFolder[0] != "_" and modFolder[0] != '.':
if not os.path.exists(os.path.join(modsRootFolder, modFolder, l10nFolderName)):
untouchedMods[modFolder] = "There is no '{folderName}' folder in the root folder of this mod.".format(folderName=l10nFolderName)
elif not os.path.exists(os.path.join(modsRootFolder, modFolder, l10nFolderName, messagesFilename)):
@@ -60,64 +79,79 @@
""".format(folderName=l10nFolderName, filename=messagesFilename)
))
+def generatePOT(templateSettings, rootPath):
+ if "skip" in templateSettings and templateSettings["skip"] == "yes":
+ return
+
+ print(f'Generating {templateSettings["project"]}')
+
+ inputRootPath = rootPath
+ if "inputRoot" in templateSettings:
+ inputRootPath = os.path.join(rootPath, templateSettings["inputRoot"])
+
+ template = Catalog(
+ header_comment=(
+f"""
+# Translation template for {templateSettings["project"]}.
+# Copyright (C) {datetime.datetime.now().year} {templateSettings["copyrightHolder"]}
+# This file is distributed under the same license as the {templateSettings["project"]} project.
+"""),
+ charset="utf-8",
+ fuzzy=False,
+ creation_date=datetime.datetime.now(),
+ revision_date=datetime.datetime.now(),
+ locale='en',
+ project=templateSettings["project"]
+ )
+
+ for rule in templateSettings["rules"]:
+ if "skip" in rule and rule["skip"] == "yes":
+ return
+
+ options = rule.get("options", {})
+ extractorClass = getattr(__import__("extractors.extractors", {}, {}, [rule["extractor"]]), rule["extractor"])
+ extractor = extractorClass(inputRootPath, rule["filemasks"], options)
+ formatFlag = None
+ if "format" in options:
+ formatFlag = options["format"]
+ for message, plural, context, location, comments in extractor.run():
+ message_id = (message, plural) if plural else message
+
+ saved_message = template.get(message_id, context) or template.add(
+ id=message_id,
+ context=context,
+ auto_comments=comments,
+ flags=[formatFlag] if formatFlag and message.find("%") != -1 else []
+ )
+ saved_message.locations.append(location)
+ saved_message.flags.discard('python-format')
+
+ write_po(
+ fileobj=open(os.path.join(rootPath, templateSettings["output"]), "wb+"),
+ catalog=template,
+ sort_by_file=True,
+ )
+ print(u"Generated \"{}\" with {} messages.".format(templateSettings["output"], len(template)))
def generateTemplatesForMessagesFile(messagesFilePath):
with open(messagesFilePath, 'r') as fileObject:
settings = json.load(fileObject)
- rootPath = os.path.dirname(messagesFilePath)
-
for templateSettings in settings:
- if "skip" in templateSettings and templateSettings["skip"] == "yes":
- continue
-
- inputRootPath = rootPath
- if "inputRoot" in templateSettings:
- inputRootPath = os.path.join(rootPath, templateSettings["inputRoot"])
-
- template = Catalog(os.path.join(rootPath, templateSettings["output"]), create=True, truncate=True)
- h = template.update_header(
- templateSettings["project"],
- "Translation template for %project.",
- "Copyright (C) {year} {holder}".format(
- year=datetime.datetime.now().year,
- holder=templateSettings["copyrightHolder"]
- ),
- "This file is distributed under the same license as the %project project.",
- plforms="nplurals=2; plural=(n != 1);"
- )
- h.remove_field("Report-Msgid-Bugs-To")
- h.remove_field("Last-Translator")
- h.remove_field("Language-Team")
- h.remove_field("Language")
- h.author = Monlist()
-
- for rule in templateSettings["rules"]:
- if "skip" in rule and rule["skip"] == "yes":
- continue
-
- options = rule.get("options", {})
- extractorClass = getattr(__import__("extractors.extractors", {}, {}, [rule["extractor"]]), rule["extractor"])
- extractor = extractorClass(inputRootPath, rule["filemasks"], options)
- formatFlag = None
- if "format" in options:
- formatFlag = options["format"]
- for message, plural, context, location, comments in extractor.run():
- msg = Message({"msgid": message, "msgid_plural": plural, "msgctxt": context, "auto_comment": comments, "flag": [formatFlag] if formatFlag and string.find(message, "%") != -1 else None, "source": [location]})
- if template.get(msg):
- template.get(msg).source.append(Monpair(location))
- else:
- template.add(msg)
-
- template.set_encoding("utf-8")
- template.sync(fitplural=True)
- print(u"Generated \"{}\" with {} messages.".format(templateSettings["output"], len(template)))
+ multiprocessing.Process(
+ target=generatePOT,
+ args=(templateSettings, os.path.dirname(messagesFilePath))
+ ).start()
def main():
-
- for root, folders, filenames in os.walk(projectRootDirectory):
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--scandir", help="Directory to start scanning for l10n folders in."
+ "Type '.' for current working directory")
+ args = parser.parse_args()
+ for root, folders, filenames in os.walk(args.scandir or projectRootDirectory):
for folder in folders:
if folder == l10nFolderName:
messagesFilePath = os.path.join(root, folder, messagesFilename)