Changeset View
Changeset View
Standalone View
Standalone View
source/tools/i18n/extractors/extractors.py
# -*- coding:utf-8 -*- | |||||
# | |||||
# Copyright (C) 2016 Wildfire Games. | # Copyright (C) 2016 Wildfire Games. | ||||
# All rights reserved. | # All rights reserved. | ||||
# | # | ||||
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the | # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the | ||||
# following conditions are met: | # following conditions are met: | ||||
# | # | ||||
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following | # Redistributions of source code must retain the above copyright notice, this list of conditions and the following | ||||
# disclaimer. | # disclaimer. | ||||
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following | # Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following | ||||
# disclaimer in the documentation and/or other materials provided with the distribution. | # disclaimer in the documentation and/or other materials provided with the distribution. | ||||
# The name of the author may not be used to endorse or promote products derived from this software without specific | # The name of the author may not be used to endorse or promote products derived from this software without specific | ||||
# prior written permission. | # prior written permission. | ||||
# | # | ||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | ||||
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||||
# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | # AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
from __future__ import absolute_import, division, print_function, unicode_literals | |||||
import codecs, re, os, sys | import codecs, re, os, sys | ||||
import json as jsonParser | import json as jsonParser | ||||
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING | from tokenize import generate_tokens, COMMENT, NAME, OP, STRING | ||||
from textwrap import dedent | from textwrap import dedent | ||||
def pathmatch(mask, path): | def pathmatch(mask, path): | ||||
""" Matches paths to a mask, where the mask supports * and **. | """ Matches paths to a mask, where the mask supports * and **. | ||||
Paths use / as the separator | Paths use / as the separator | ||||
* matches a sequence of characters without /. | * matches a sequence of characters without /. | ||||
** matches a sequence of characters without / followed by a / and | ** matches a sequence of characters without / followed by a / and | ||||
sequence of characters without / | sequence of characters without / | ||||
:return: true iff path matches the mask, false otherwise | :return: true iff path matches the mask, false otherwise | ||||
""" | """ | ||||
s = re.split(r"([*][*]?)", mask) | s = re.split(r"([*][*]?)", mask) | ||||
p = "" | p = "" | ||||
for i in xrange(len(s)): | for i in range(len(s)): | ||||
if i % 2 != 0: | if i % 2 != 0: | ||||
p = p + "[^/]+" | p = p + "[^/]+" | ||||
if len(s[i]) == 2: | if len(s[i]) == 2: | ||||
p = p + "(/[^/]+)*" | p = p + "(/[^/]+)*" | ||||
else: | else: | ||||
p = p + re.escape(s[i]) | p = p + re.escape(s[i]) | ||||
p = p + "$" | p = p + "$" | ||||
return re.match(p, path) != None | return re.match(p, path) != None | ||||
▲ Show 20 Lines • Show All 271 Lines • ▼ Show 20 Lines | def formatBreadcrumbs(breadcrumbs): | ||||
for piece in breadcrumbs[1:]: | for piece in breadcrumbs[1:]: | ||||
if isinstance(piece, int): outputString += "[" + str(piece) + "]" | if isinstance(piece, int): outputString += "[" + str(piece) + "]" | ||||
else: outputString += "." + piece | else: outputString += "." + piece | ||||
return outputString | return outputString | ||||
def extractFromFile(self, filepath): | def extractFromFile(self, filepath): | ||||
with codecs.open(filepath, "r", 'utf-8') as fileObject: | with codecs.open(filepath, "r", 'utf-8') as fileObject: | ||||
for message, breadcrumbs in self.extractFromString(fileObject.read()): | for message, breadcrumbs in self.extractFromString(fileObject.read()): | ||||
yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), -1, self.comments | yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), None, self.comments | ||||
def extractFromString(self, string): | def extractFromString(self, string): | ||||
self.breadcrumbs = [] | self.breadcrumbs = [] | ||||
jsonDocument = jsonParser.loads(string) | jsonDocument = jsonParser.loads(string) | ||||
if isinstance(jsonDocument, list): | if isinstance(jsonDocument, list): | ||||
for message, breadcrumbs in self.parseList(jsonDocument): | for message, breadcrumbs in self.parseList(jsonDocument): | ||||
if message: # Skip empty strings. | if message: # Skip empty strings. | ||||
yield message, breadcrumbs | yield message, breadcrumbs | ||||
Show All 16 Lines | def parseList(self, itemsList): | ||||
yield message, breadcrumbs | yield message, breadcrumbs | ||||
del self.breadcrumbs[-1] | del self.breadcrumbs[-1] | ||||
index += 1 | index += 1 | ||||
def parseDictionary(self, dictionary): | def parseDictionary(self, dictionary): | ||||
for keyword in dictionary: | for keyword in dictionary: | ||||
self.breadcrumbs.append(keyword) | self.breadcrumbs.append(keyword) | ||||
if keyword in self.keywords: | if keyword in self.keywords: | ||||
if isinstance(dictionary[keyword], unicode): | if isinstance(dictionary[keyword], str): | ||||
yield dictionary[keyword], self.breadcrumbs | yield dictionary[keyword], self.breadcrumbs | ||||
elif isinstance(dictionary[keyword], list): | elif isinstance(dictionary[keyword], list): | ||||
for message, breadcrumbs in self.extractList(dictionary[keyword]): | for message, breadcrumbs in self.extractList(dictionary[keyword]): | ||||
yield message, breadcrumbs | yield message, breadcrumbs | ||||
elif isinstance(dictionary[keyword], dict): | elif isinstance(dictionary[keyword], dict): | ||||
for message, breadcrumbs in self.extractDictionary(dictionary[keyword]): | for message, breadcrumbs in self.extractDictionary(dictionary[keyword]): | ||||
yield message, breadcrumbs | yield message, breadcrumbs | ||||
elif isinstance(dictionary[keyword], list): | elif isinstance(dictionary[keyword], list): | ||||
for message, breadcrumbs in self.parseList(dictionary[keyword]): | for message, breadcrumbs in self.parseList(dictionary[keyword]): | ||||
yield message, breadcrumbs | yield message, breadcrumbs | ||||
elif isinstance(dictionary[keyword], dict): | elif isinstance(dictionary[keyword], dict): | ||||
for message, breadcrumbs in self.parseDictionary(dictionary[keyword]): | for message, breadcrumbs in self.parseDictionary(dictionary[keyword]): | ||||
yield message, breadcrumbs | yield message, breadcrumbs | ||||
del self.breadcrumbs[-1] | del self.breadcrumbs[-1] | ||||
def extractList(self, itemsList): | def extractList(self, itemsList): | ||||
index = 0 | index = 0 | ||||
for listItem in itemsList: | for listItem in itemsList: | ||||
self.breadcrumbs.append(index) | self.breadcrumbs.append(index) | ||||
if isinstance(listItem, unicode): | if isinstance(listItem, str): | ||||
yield listItem, self.breadcrumbs | yield listItem, self.breadcrumbs | ||||
del self.breadcrumbs[-1] | del self.breadcrumbs[-1] | ||||
index += 1 | index += 1 | ||||
def extractDictionary(self, dictionary): | def extractDictionary(self, dictionary): | ||||
for keyword in dictionary: | for keyword in dictionary: | ||||
self.breadcrumbs.append(keyword) | self.breadcrumbs.append(keyword) | ||||
if isinstance(dictionary[keyword], unicode): | if isinstance(dictionary[keyword], str): | ||||
yield dictionary[keyword], self.breadcrumbs | yield dictionary[keyword], self.breadcrumbs | ||||
del self.breadcrumbs[-1] | del self.breadcrumbs[-1] | ||||
class xml(Extractor): | class xml(Extractor): | ||||
""" Extract messages from XML files. | """ Extract messages from XML files. | ||||
""" | """ | ||||
Show All 24 Lines | def extractFromFile(self, filepath): | ||||
for message, breadcrumbs in jsonExtractor.extractFromString(element.text): | for message, breadcrumbs in jsonExtractor.extractFromString(element.text): | ||||
yield message, None, context, json.formatBreadcrumbs(breadcrumbs), position, comments | yield message, None, context, json.formatBreadcrumbs(breadcrumbs), position, comments | ||||
else: | else: | ||||
breadcrumb = None | breadcrumb = None | ||||
if "locationAttributes" in self.keywords[keyword]: | if "locationAttributes" in self.keywords[keyword]: | ||||
attributes = [element.get(attribute) for attribute in self.keywords[keyword]["locationAttributes"] if attribute in element.attrib] | attributes = [element.get(attribute) for attribute in self.keywords[keyword]["locationAttributes"] if attribute in element.attrib] | ||||
breadcrumb = "({attributes})".format(attributes=", ".join(attributes)) | breadcrumb = "({attributes})".format(attributes=", ".join(attributes)) | ||||
if "context" in element.attrib: | if "context" in element.attrib: | ||||
context = unicode(element.get("context")) | context = str(element.get("context")) | ||||
elif "tagAsContext" in self.keywords[keyword]: | elif "tagAsContext" in self.keywords[keyword]: | ||||
context = keyword | context = keyword | ||||
elif "customContext" in self.keywords[keyword]: | elif "customContext" in self.keywords[keyword]: | ||||
context = self.keywords[keyword]["customContext"] | context = self.keywords[keyword]["customContext"] | ||||
if "comment" in element.attrib: | if "comment" in element.attrib: | ||||
comment = element.get("comment") | comment = element.get("comment") | ||||
comment = u" ".join(comment.split()) # Remove tabs, line breaks and unecessary spaces. | comment = u" ".join(comment.split()) # Remove tabs, line breaks and unecessary spaces. | ||||
comments.append(comment) | comments.append(comment) | ||||
if "splitOnWhitespace" in self.keywords[keyword]: | if "splitOnWhitespace" in self.keywords[keyword]: | ||||
for splitText in element.text.split(): | for splitText in element.text.split(): | ||||
# split on whitespace is used for token lists, there, a leading '-' means the token has to be removed, so it's not to be processed here either | # split on whitespace is used for token lists, there, a leading '-' means the token has to be removed, so it's not to be processed here either | ||||
if splitText[0] != "-": | if splitText[0] != "-": | ||||
yield unicode(splitText), None, context, breadcrumb, position, comments | yield str(splitText), None, context, breadcrumb, position, comments | ||||
else: | else: | ||||
yield unicode(element.text), None, context, breadcrumb, position, comments | yield str(element.text), None, context, breadcrumb, position, comments | ||||
# Hack from http://stackoverflow.com/a/2819788 | # Hack from http://stackoverflow.com/a/2819788 | ||||
class FakeSectionHeader(object): | class FakeSectionHeader(object): | ||||
def __init__(self, fp): | def __init__(self, fp): | ||||
self.fp = fp | self.fp = fp | ||||
self.sechead = '[root]\n' | self.sechead = '[root]\n' | ||||
Show All 26 Lines |
Wildfire Games · Phabricator