Index: ps/trunk/source/tools/i18n/checkTranslationsForSpam.py =================================================================== --- ps/trunk/source/tools/i18n/checkTranslationsForSpam.py (revision 25537) +++ ps/trunk/source/tools/i18n/checkTranslationsForSpam.py (nonexistent) @@ -1,91 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (C) 2020 Wildfire Games. -# This file is part of 0 A.D. -# -# 0 A.D. is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 2 of the License, or -# (at your option) any later version. -# -# 0 A.D. is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with 0 A.D. If not, see . - -import os, re, sys -import multiprocessing - -from i18n_helper import l10nToolsDirectory, projectRootDirectory -from i18n_helper.catalog import Catalog -from i18n_helper.globber import getCatalogs - -l10nFolderName = "l10n" - -def checkTranslationsForSpam(inputFilePath): - print(f"Checking {inputFilePath}") - templateCatalog = Catalog.readFrom() - - # If language codes were specified on the command line, filter by those. - filters = sys.argv[1:] - - # Load existing translation catalogs. - existingTranslationCatalogs = getCatalogs(inputFilePath, filters) - - urlPattern = re.compile(r"https?://(?:[a-z0-9-_$@./&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", re.IGNORECASE) - - # Check that there are no spam URLs. - # Loop through all messages in the .POT catalog for URLs. - # For each, check for the corresponding key in the .PO catalogs. - # If found, check that URLS in the .PO keys are the same as those in the .POT key. - for templateMessage in templateCatalog: - templateUrls = set(urlPattern.findall( - templateMessage.id[0] if templateMessage.pluralizable else templateMessage.id - )) - # As a sanity check, verify that the template message is coherent - if templateMessage.pluralizable: - pluralUrls = set(urlPattern.findall(templateMessage.id[1])) - if pluralUrls.difference(templateUrls): - print(f"{inputFilePath} - Different URLs in singular and plural source strings " - f"for '{templateMessage}' in '{inputFilePath}'") - - for translationCatalog in existingTranslationCatalogs: - translationMessage = translationCatalog.get(templateMessage.id, templateMessage.context) - if not translationMessage: - continue - - translationUrls = set(urlPattern.findall( - translationMessage.string[0] if translationMessage.pluralizable else translationMessage.string - )) - unknown_urls = translationUrls.difference(templateUrls) - if unknown_urls: - print(f'{inputFilePath} - {translationCatalog.locale}: ' - f'Found unknown URL(s) {", ".join(unknown_urls)} in the translation ' - f'which do not match any of the URLs in the template: {", ".join(templateUrls)}') - print(f"Done checking {inputFilePath}") - -def main(): - print("\n\tWARNING: Remember to regenerate the POT files with “updateTemplates.py” " - "before you run this script.\n\tPOT files are not in the repository.\n") - foundPots = 0 - for root, folders, filenames in os.walk(projectRootDirectory): - for filename in filenames: - if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n": - foundPots += 1 - multiprocessing.Process( - target=checkTranslationsForSpam, - args=(os.path.join(root, filename), ) - ).start() - if foundPots == 0: - print( - "This script did not work because no '.pot' files were found. " - "Please run 'updateTemplates.py' to generate the '.pot' files, " - "and run 'pullTranslations.py' to pull the latest translations from Transifex. " - "Then you can run this script to check for spam in translations.") - - -if __name__ == "__main__": - main() Property changes on: ps/trunk/source/tools/i18n/checkTranslationsForSpam.py ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Index: ps/trunk/source/tools/i18n/checkTranslations.py =================================================================== --- ps/trunk/source/tools/i18n/checkTranslations.py (nonexistent) +++ ps/trunk/source/tools/i18n/checkTranslations.py (revision 25538) @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2021 Wildfire Games. +# This file is part of 0 A.D. +# +# 0 A.D. is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# 0 A.D. is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with 0 A.D. If not, see . + +import os +import re +import sys +import multiprocessing + +from i18n_helper import projectRootDirectory +from i18n_helper.catalog import Catalog +from i18n_helper.globber import getCatalogs + +l10nFolderName = "l10n" + +VERBOSE = 0 + + +class MessageChecker: + """Checks all messages in a catalog against a regex.""" + def __init__(self, human_name, regex): + self.regex = re.compile(regex, re.IGNORECASE) + self.human_name = human_name + + def check(self, inputFilePath, templateMessage, translatedCatalogs): + patterns = set(self.regex.findall( + templateMessage.id[0] if templateMessage.pluralizable else templateMessage.id + )) + + # As a sanity check, verify that the template message is coherent. + # Note that these tend to be false positives. + # TODO: the pssible tags are usually comments, we ought be able to find them. + if templateMessage.pluralizable: + pluralUrls = set(self.regex.findall(templateMessage.id[1])) + if pluralUrls.difference(patterns): + print(f"{inputFilePath} - Different {self.human_name} in singular and plural source strings " + f"for '{templateMessage}' in '{inputFilePath}'") + + for translationCatalog in translatedCatalogs: + translationMessage = translationCatalog.get( + templateMessage.id, templateMessage.context) + if not translationMessage: + continue + + translatedPatterns = set(self.regex.findall( + translationMessage.string[0] if translationMessage.pluralizable else translationMessage.string + )) + unknown_patterns = translatedPatterns.difference(patterns) + if unknown_patterns: + print(f'{inputFilePath} - {translationCatalog.locale}: ' + f'Found unknown {self.human_name} {", ".join(["`" + x + "`" for x in unknown_patterns])} in the translation ' + f'which do not match any of the URLs in the template: {", ".join(["`" + x + "`" for x in patterns])}') + + +def check_translations(inputFilePath): + if VERBOSE: + print(f"Checking {inputFilePath}") + templateCatalog = Catalog.readFrom(inputFilePath) + + # If language codes were specified on the command line, filter by those. + filters = sys.argv[1:] + + # Load existing translation catalogs. + existingTranslationCatalogs = getCatalogs(inputFilePath, filters) + + spam = MessageChecker("url", r"https?://(?:[a-z0-9-_$@./&+]|(?:%[0-9a-fA-F][0-9a-fA-F]))+") + sprintf = MessageChecker("sprintf", r"%\([^)]+\)s") + tags = MessageChecker("tag", r"[^\\][^\\](\[[^]]+/?\])") + + # Check that there are no spam URLs. + # Loop through all messages in the .POT catalog for URLs. + # For each, check for the corresponding key in the .PO catalogs. + # If found, check that URLS in the .PO keys are the same as those in the .POT key. + for templateMessage in templateCatalog: + spam.check(inputFilePath, templateMessage, existingTranslationCatalogs) + sprintf.check(inputFilePath, templateMessage, existingTranslationCatalogs) + tags.check(inputFilePath, templateMessage, existingTranslationCatalogs) + + if VERBOSE: + print(f"Done checking {inputFilePath}") + + +def main(): + print("\n\tWARNING: Remember to regenerate the POT files with “updateTemplates.py” " + "before you run this script.\n\tPOT files are not in the repository.\n") + foundPots = 0 + for root, folders, filenames in os.walk(projectRootDirectory): + for filename in filenames: + if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n": + foundPots += 1 + multiprocessing.Process( + target=check_translations, + args=(os.path.join(root, filename), ) + ).start() + if foundPots == 0: + print( + "This script did not work because no '.pot' files were found. " + "Please run 'updateTemplates.py' to generate the '.pot' files, " + "and run 'pullTranslations.py' to pull the latest translations from Transifex. " + "Then you can run this script to check for spam in translations.") + + +if __name__ == "__main__": + main() Property changes on: ps/trunk/source/tools/i18n/checkTranslations.py ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property