Index: source/tools/i18n/checkTranslationsForSpam.py =================================================================== --- source/tools/i18n/checkTranslationsForSpam.py +++ source/tools/i18n/checkTranslationsForSpam.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 # -*- coding:utf-8 -*- # -# Copyright (C) 2014 Wildfire Games. +# Copyright (C) 2018 Wildfire Games. # This file is part of 0 A.D. # # 0 A.D. is free software: you can redistribute it and/or modify @@ -24,19 +24,20 @@ from pology.catalog import Catalog from pology.message import Message +# One can pass either a *.pot filename or a language from commandline l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__)) projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardir, os.pardir, os.pardir)) l10nFolderName = "l10n" -def checkTranslationsForSpam(inputFilePath): +def checkTranslations(inputFilePath, checkFilters): - print(u"Checking", inputFilePath) + print(u"Checking", os.path.basename(inputFilePath)) templateCatalog = Catalog(inputFilePath) # If language codes were specified on the command line, filter by those. - filters = sys.argv[1:] + filters = sys.argv[1:] if checkFilters else "" # Load existing translation catalogs. existingTranslationCatalogs = [] @@ -52,6 +53,58 @@ if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]: existingTranslationCatalogs.append([filename[:-charactersToSkip], os.path.join(l10nFolderPath, filename)]) + checkTranslationsForSprintfDefects(templateCatalog, existingTranslationCatalogs) + checkTranslationsForSpam(templateCatalog, existingTranslationCatalogs) + +def checkTranslationsForSprintfDefects(templateCatalog, existingTranslationCatalogs): + + for languageCode, pofile in existingTranslationCatalogs: + translationCatalog = Catalog(pofile) + for templateMessage in templateCatalog: + checkTranslationForSprintfDefects(templateMessage, translationCatalog, pofile) + +def checkTranslationForSprintfDefects(templateMessage, translationCatalog, pofile): + + # get translation + # this returns an empty list or a list with a single element, see + # http://pology.nedohodnik.net/doc/api/en_US/pology.catalog.Catalog-class.html#select_by_key + translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid) + if not translationMessage: + return + + # get sprintf arguments in the singular template + sprintfPattern = re.compile(u"%\([a-zA-Z0-9\-_]*\)s") + + templateSprintfArgs = sprintfPattern.findall(templateMessage.msgid) + + if templateMessage.msgid_plural: + templateSprintfArgs += sprintfPattern.findall(templateMessage.msgid_plural) + + # Some sprintf translation comments mention sprintf arguments that are not used + # by the template string but can be used by the translaton + if templateMessage.auto_comment: + for comment in templateMessage.auto_comment: + templateSprintfArgs += sprintfPattern.findall(comment) + + # assert that every sprintf argument in the translation is present in any of the template strings + for translationString in translationMessage[0].msgstr: + if not translationString: + return + + translationSprintfArgs = sprintfPattern.findall(translationString) + + for translationSprintfArg in translationSprintfArgs: + if translationSprintfArg not in templateSprintfArgs: + print(u"{}: The sprintf argument {} used by the translation “{}” isn't a present in the template string “{}” “{}”".format( + os.path.basename(pofile), + translationSprintfArg, + translationString.replace("\n", "\\n"), + templateMessage.msgid.replace("\n", "\\n"), + templateMessage.msgid_plural.replace("\n", "\\n") if templateMessage.msgid_plural else "")) + # Show at most one error message per translation + return + +def checkTranslationsForSpam(templateCatalog, existingTranslationCatalogs): urlPattern = re.compile(u"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+") # Check the URLs in translations against the URLs in the translation template. @@ -60,7 +113,6 @@ for templateMessage in templateCatalog: translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid) if translationMessage: - templateSingularString = templateMessage.msgid templateUrls = urlPattern.findall(templateMessage.msgid) # Assert that the same URL is used in both the plural and singular forms. if templateMessage.msgid_plural and len(templateMessage.msgstr) > 1: @@ -75,22 +127,37 @@ for translationUrl in translationUrls: if translationUrl not in templateUrls: print(u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}".format( - languageCode, + os.path.basename(pofile), translationUrl, u", ".join(templateUrls))) +def checkTranslationFile(fullPath, checkFilters): + try: + checkTranslations(fullPath, checkFilters) + except (KeyboardInterrupt, SystemExit): + sys.exit() + except: + raise + def main(): print(u"\n WARNING: Remember to regenerate the POT files with “updateTemplates.py” before you run this script.\n POT files are not in the repository.\n") + if len(sys.argv) > 1: + filePath = ' '.join(sys.argv[1:]) + if os.path.isfile(filePath): + checkTranslationFile(filePath, False) + return + foundPots = 0 for root, folders, filenames in os.walk(projectRootDirectory): root = root.decode("utf-8") for filename in filenames: if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n": foundPots += 1 - checkTranslationsForSpam(os.path.join(root, filename)) + checkTranslationFile(os.path.join(root, filename), True) + if foundPots == 0: print(u"This script did not work because no ‘.pot’ files were found.") print(u"Please, run ‘updateTemplates.py’ to generate the ‘.pot’ files, and run ‘pullTranslations.py’ to pull the latest translations from Transifex.")