Changeset View
Changeset View
Standalone View
Standalone View
source/tools/i18n/checkTranslationsForSpam.py
#!/usr/bin/env python2 | #!/usr/bin/env python2 | ||||
# -*- coding:utf-8 -*- | # -*- coding:utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2014 Wildfire Games. | # Copyright (C) 2018 Wildfire Games. | ||||
Silier: should not be here 2019? | |||||
# This file is part of 0 A.D. | # This file is part of 0 A.D. | ||||
# | # | ||||
# 0 A.D. is free software: you can redistribute it and/or modify | # 0 A.D. is free software: you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | # it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation, either version 2 of the License, or | # the Free Software Foundation, either version 2 of the License, or | ||||
# (at your option) any later version. | # (at your option) any later version. | ||||
# | # | ||||
# 0 A.D. is distributed in the hope that it will be useful, | # 0 A.D. is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | # GNU General Public License for more details. | ||||
# | # | ||||
# You should have received a copy of the GNU General Public License | # You should have received a copy of the GNU General Public License | ||||
# along with 0 A.D. If not, see <http://www.gnu.org/licenses/>. | # along with 0 A.D. If not, see <http://www.gnu.org/licenses/>. | ||||
from __future__ import absolute_import, division, print_function, unicode_literals | from __future__ import absolute_import, division, print_function, unicode_literals | ||||
import codecs, os, re, sys | import codecs, os, re, sys | ||||
from pology.catalog import Catalog | from pology.catalog import Catalog | ||||
from pology.message import Message | from pology.message import Message | ||||
# One can pass either a *.pot filename or a language from commandline | |||||
l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__)) | l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__)) | ||||
projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir)) | projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir)) | ||||
l10nFolderName = "l10n" | l10nFolderName = "l10n" | ||||
def checkTranslationsForSpam(inputFilePath): | def checkTranslations(inputFilePath, checkFilters): | ||||
print(u"Checking", inputFilePath) | print(u"Checking", os.path.basename(inputFilePath)) | ||||
templateCatalog = Catalog(inputFilePath) | templateCatalog = Catalog(inputFilePath) | ||||
# If language codes were specified on the command line, filter by those. | # If language codes were specified on the command line, filter by those. | ||||
filters = sys.argv[1:] | filters = sys.argv[1:] if checkFilters else "" | ||||
# Load existing translation catalogs. | # Load existing translation catalogs. | ||||
existingTranslationCatalogs = [] | existingTranslationCatalogs = [] | ||||
l10nFolderPath = os.path.dirname(inputFilePath) | l10nFolderPath = os.path.dirname(inputFilePath) | ||||
# .pot is one letter longer than .po, but the dot that separates the locale | # .pot is one letter longer than .po, but the dot that separates the locale | ||||
# code from the rest of the filename in .po files makes up for that. | # code from the rest of the filename in .po files makes up for that. | ||||
charactersToSkip = len(os.path.basename(inputFilePath)) | charactersToSkip = len(os.path.basename(inputFilePath)) | ||||
for filename in os.listdir(l10nFolderPath): | for filename in os.listdir(l10nFolderPath): | ||||
if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long": | if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long": | ||||
if not filters or filename[:-charactersToSkip] in filters: | if not filters or filename[:-charactersToSkip] in filters: | ||||
if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]: | if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]: | ||||
existingTranslationCatalogs.append([filename[:-charactersToSkip], os.path.join(l10nFolderPath, filename)]) | existingTranslationCatalogs.append([filename[:-charactersToSkip], os.path.join(l10nFolderPath, filename)]) | ||||
checkTranslationsForSprintfDefects(templateCatalog, existingTranslationCatalogs) | |||||
checkTranslationsForSpam(templateCatalog, existingTranslationCatalogs) | |||||
def checkTranslationsForSprintfDefects(templateCatalog, existingTranslationCatalogs): | |||||
for languageCode, pofile in existingTranslationCatalogs: | |||||
translationCatalog = Catalog(pofile) | |||||
for templateMessage in templateCatalog: | |||||
checkTranslationForSprintfDefects(templateMessage, translationCatalog, pofile) | |||||
def checkTranslationForSprintfDefects(templateMessage, translationCatalog, pofile): | |||||
# get translation | |||||
# this returns an empty list or a list with a single element, see | |||||
# http://pology.nedohodnik.net/doc/api/en_US/pology.catalog.Catalog-class.html#select_by_key | |||||
translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid) | |||||
if not translationMessage: | |||||
return | |||||
# get sprintf arguments in the singular template | |||||
sprintfPattern = re.compile(u"%\([a-zA-Z0-9\-_]*\)s") | |||||
templateSprintfArgs = sprintfPattern.findall(templateMessage.msgid) | |||||
if templateMessage.msgid_plural: | |||||
templateSprintfArgs += sprintfPattern.findall(templateMessage.msgid_plural) | |||||
# Some sprintf translation comments mention sprintf arguments that are not used | |||||
# by the template string but can be used by the translaton | |||||
if templateMessage.auto_comment: | |||||
for comment in templateMessage.auto_comment: | |||||
templateSprintfArgs += sprintfPattern.findall(comment) | |||||
# assert that every sprintf argument in the translation is present in any of the template strings | |||||
for translationString in translationMessage[0].msgstr: | |||||
if not translationString: | |||||
return | |||||
translationSprintfArgs = sprintfPattern.findall(translationString) | |||||
for translationSprintfArg in translationSprintfArgs: | |||||
if translationSprintfArg not in templateSprintfArgs: | |||||
print(u"{}: The sprintf argument {} used by the translation “{}” isn't a present in the template string “{}” “{}”".format( | |||||
ImarokUnsubmitted Not Done Inline Actionsisn't a present → isn't present? Imarok: `isn't a present` → `isn't present`? | |||||
elexisAuthorUnsubmitted Done Inline Actions? elexis: ? | |||||
os.path.basename(pofile), | |||||
translationSprintfArg, | |||||
translationString.replace("\n", "\\n"), | |||||
templateMessage.msgid.replace("\n", "\\n"), | |||||
templateMessage.msgid_plural.replace("\n", "\\n") if templateMessage.msgid_plural else "")) | |||||
# Show at most one error message per translation | |||||
return | |||||
def checkTranslationsForSpam(templateCatalog, existingTranslationCatalogs): | |||||
urlPattern = re.compile(u"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+") | urlPattern = re.compile(u"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+") | ||||
# Check the URLs in translations against the URLs in the translation template. | # Check the URLs in translations against the URLs in the translation template. | ||||
for languageCode, pofile in existingTranslationCatalogs: | for languageCode, pofile in existingTranslationCatalogs: | ||||
translationCatalog = Catalog(pofile) | translationCatalog = Catalog(pofile) | ||||
for templateMessage in templateCatalog: | for templateMessage in templateCatalog: | ||||
translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid) | translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid) | ||||
if translationMessage: | if translationMessage: | ||||
templateSingularString = templateMessage.msgid | |||||
templateUrls = urlPattern.findall(templateMessage.msgid) | templateUrls = urlPattern.findall(templateMessage.msgid) | ||||
# Assert that the same URL is used in both the plural and singular forms. | # Assert that the same URL is used in both the plural and singular forms. | ||||
if templateMessage.msgid_plural and len(templateMessage.msgstr) > 1: | if templateMessage.msgid_plural and len(templateMessage.msgstr) > 1: | ||||
pluralUrls = urlPattern.findall(templateMessage.msgstr[0]) | pluralUrls = urlPattern.findall(templateMessage.msgstr[0]) | ||||
for url in pluralUrls: | for url in pluralUrls: | ||||
if url not in templateUrls: | if url not in templateUrls: | ||||
print(u"Different URLs in singular and plural source strings for ‘{}’ in ‘{}’".format( | print(u"Different URLs in singular and plural source strings for ‘{}’ in ‘{}’".format( | ||||
templateMessage.msgid, | templateMessage.msgid, | ||||
inputFilePath)) | inputFilePath)) | ||||
for translationString in translationMessage[0].msgstr: | for translationString in translationMessage[0].msgstr: | ||||
translationUrls = urlPattern.findall(translationString) | translationUrls = urlPattern.findall(translationString) | ||||
for translationUrl in translationUrls: | for translationUrl in translationUrls: | ||||
if translationUrl not in templateUrls: | if translationUrl not in templateUrls: | ||||
print(u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}".format( | print(u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}".format( | ||||
languageCode, | os.path.basename(pofile), | ||||
translationUrl, | translationUrl, | ||||
u", ".join(templateUrls))) | u", ".join(templateUrls))) | ||||
def checkTranslationFile(fullPath, checkFilters): | |||||
StanUnsubmitted Not Done Inline ActionsIsn't that the natural Python behavior ? Stan: Isn't that the natural Python behavior ? | |||||
elexisAuthorUnsubmitted Done Inline ActionsWe don't want an error stack, but a silent shutdown on KeyboardInterrupt elexis: We don't want an error stack, but a silent shutdown on KeyboardInterrupt | |||||
try: | |||||
checkTranslations(fullPath, checkFilters) | |||||
except (KeyboardInterrupt, SystemExit): | |||||
sys.exit() | |||||
except: | |||||
raise | |||||
def main(): | def main(): | ||||
print(u"\n WARNING: Remember to regenerate the POT files with “updateTemplates.py” before you run this script.\n POT files are not in the repository.\n") | print(u"\n WARNING: Remember to regenerate the POT files with “updateTemplates.py” before you run this script.\n POT files are not in the repository.\n") | ||||
if len(sys.argv) > 1: | |||||
StanUnsubmitted Not Done Inline ActionsWe could use argparse to give a nice interface to users and help maybe. Stan: We could use argparse to give a nice interface to users and help maybe. | |||||
elexisAuthorUnsubmitted Done Inline ActionsYes elexis: Yes | |||||
filePath = ' '.join(sys.argv[1:]) | |||||
if os.path.isfile(filePath): | |||||
checkTranslationFile(filePath, False) | |||||
return | |||||
foundPots = 0 | foundPots = 0 | ||||
for root, folders, filenames in os.walk(projectRootDirectory): | for root, folders, filenames in os.walk(projectRootDirectory): | ||||
root = root.decode("utf-8") | root = root.decode("utf-8") | ||||
for filename in filenames: | for filename in filenames: | ||||
if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n": | if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n": | ||||
foundPots += 1 | foundPots += 1 | ||||
checkTranslationsForSpam(os.path.join(root, filename)) | checkTranslationFile(os.path.join(root, filename), True) | ||||
if foundPots == 0: | if foundPots == 0: | ||||
print(u"This script did not work because no ‘.pot’ files were found.") | print(u"This script did not work because no ‘.pot’ files were found.") | ||||
print(u"Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex.") | print(u"Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex.") | ||||
print(u"Then you can run this script to generate ‘.po’ files with the longest strings.") | print(u"Then you can run this script to generate ‘.po’ files with the longest strings.") | ||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
main() | main() |
Wildfire Games · Phabricator
should not be here 2019?