Differential D2757 Diff 14231 source/tools/i18n/extractors/extractors.py

Changeset View

Standalone View

source/tools/i18n/extractors/extractors.py

# -- coding:utf-8 --
#
# Copyright (C) 2016 Wildfire Games.		# Copyright (C) 2016 Wildfire Games.
# All rights reserved.		# All rights reserved.
#		#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the		# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
# following conditions are met:		# following conditions are met:
#		#
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following		# Redistributions of source code must retain the above copyright notice, this list of conditions and the following
# disclaimer.		# disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following		# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with the distribution.		# disclaimer in the documentation and/or other materials provided with the distribution.
# The name of the author may not be used to endorse or promote products derived from this software without specific		# The name of the author may not be used to endorse or promote products derived from this software without specific
# prior written permission.		# prior written permission.
#		#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,		# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE		# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT		# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)		# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR		# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.		# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import, division, print_function, unicode_literals

import codecs, re, os, sys		import codecs, re, os, sys
import json as jsonParser		import json as jsonParser

from tokenize import generate_tokens, COMMENT, NAME, OP, STRING		from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
from textwrap import dedent		from textwrap import dedent

def pathmatch(mask, path):		def pathmatch(mask, path):
""" Matches paths to a mask, where the mask supports * and **.		""" Matches paths to a mask, where the mask supports * and **.

Paths use / as the separator		Paths use / as the separator
* matches a sequence of characters without /.		* matches a sequence of characters without /.
** matches a sequence of characters without / followed by a / and		** matches a sequence of characters without / followed by a / and
sequence of characters without /		sequence of characters without /
:return: true iff path matches the mask, false otherwise		:return: true iff path matches the mask, false otherwise
"""		"""
s = re.split(r"([][]?)", mask)		s = re.split(r"([][]?)", mask)
p = ""		p = ""
for i in xrange(len(s)):		for i in range(len(s)):
if i % 2 != 0:		if i % 2 != 0:
p = p + "[^/]+"		p = p + "[^/]+"
if len(s[i]) == 2:		if len(s[i]) == 2:
p = p + "(/[^/]+)*"		p = p + "(/[^/]+)*"
else:		else:
p = p + re.escape(s[i])		p = p + re.escape(s[i])
p = p + "$"		p = p + "$"
return re.match(p, path) != None		return re.match(p, path) != None
▲ Show 20 Lines • Show All 271 Lines • ▼ Show 20 Lines	def formatBreadcrumbs(breadcrumbs):
for piece in breadcrumbs[1:]:		for piece in breadcrumbs[1:]:
if isinstance(piece, int): outputString += "[" + str(piece) + "]"		if isinstance(piece, int): outputString += "[" + str(piece) + "]"
else: outputString += "." + piece		else: outputString += "." + piece
return outputString		return outputString

def extractFromFile(self, filepath):		def extractFromFile(self, filepath):
with codecs.open(filepath, "r", 'utf-8') as fileObject:		with codecs.open(filepath, "r", 'utf-8') as fileObject:
for message, breadcrumbs in self.extractFromString(fileObject.read()):		for message, breadcrumbs in self.extractFromString(fileObject.read()):
yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), -1, self.comments		yield message, None, self.context, self.formatBreadcrumbs(breadcrumbs), None, self.comments

def extractFromString(self, string):		def extractFromString(self, string):
self.breadcrumbs = []		self.breadcrumbs = []
jsonDocument = jsonParser.loads(string)		jsonDocument = jsonParser.loads(string)
if isinstance(jsonDocument, list):		if isinstance(jsonDocument, list):
for message, breadcrumbs in self.parseList(jsonDocument):		for message, breadcrumbs in self.parseList(jsonDocument):
if message: # Skip empty strings.		if message: # Skip empty strings.
yield message, breadcrumbs		yield message, breadcrumbs
Show All 16 Lines	def parseList(self, itemsList):
yield message, breadcrumbs		yield message, breadcrumbs
del self.breadcrumbs[-1]		del self.breadcrumbs[-1]
index += 1		index += 1

def parseDictionary(self, dictionary):		def parseDictionary(self, dictionary):
for keyword in dictionary:		for keyword in dictionary:
self.breadcrumbs.append(keyword)		self.breadcrumbs.append(keyword)
if keyword in self.keywords:		if keyword in self.keywords:
if isinstance(dictionary[keyword], unicode):		if isinstance(dictionary[keyword], str):
yield dictionary[keyword], self.breadcrumbs		yield dictionary[keyword], self.breadcrumbs
elif isinstance(dictionary[keyword], list):		elif isinstance(dictionary[keyword], list):
for message, breadcrumbs in self.extractList(dictionary[keyword]):		for message, breadcrumbs in self.extractList(dictionary[keyword]):
yield message, breadcrumbs		yield message, breadcrumbs
elif isinstance(dictionary[keyword], dict):		elif isinstance(dictionary[keyword], dict):
for message, breadcrumbs in self.extractDictionary(dictionary[keyword]):		for message, breadcrumbs in self.extractDictionary(dictionary[keyword]):
yield message, breadcrumbs		yield message, breadcrumbs
elif isinstance(dictionary[keyword], list):		elif isinstance(dictionary[keyword], list):
for message, breadcrumbs in self.parseList(dictionary[keyword]):		for message, breadcrumbs in self.parseList(dictionary[keyword]):
yield message, breadcrumbs		yield message, breadcrumbs
elif isinstance(dictionary[keyword], dict):		elif isinstance(dictionary[keyword], dict):
for message, breadcrumbs in self.parseDictionary(dictionary[keyword]):		for message, breadcrumbs in self.parseDictionary(dictionary[keyword]):
yield message, breadcrumbs		yield message, breadcrumbs
del self.breadcrumbs[-1]		del self.breadcrumbs[-1]

def extractList(self, itemsList):		def extractList(self, itemsList):
index = 0		index = 0
for listItem in itemsList:		for listItem in itemsList:
self.breadcrumbs.append(index)		self.breadcrumbs.append(index)
if isinstance(listItem, unicode):		if isinstance(listItem, str):
yield listItem, self.breadcrumbs		yield listItem, self.breadcrumbs
del self.breadcrumbs[-1]		del self.breadcrumbs[-1]
index += 1		index += 1

def extractDictionary(self, dictionary):		def extractDictionary(self, dictionary):
for keyword in dictionary:		for keyword in dictionary:
self.breadcrumbs.append(keyword)		self.breadcrumbs.append(keyword)
if isinstance(dictionary[keyword], unicode):		if isinstance(dictionary[keyword], str):
yield dictionary[keyword], self.breadcrumbs		yield dictionary[keyword], self.breadcrumbs
del self.breadcrumbs[-1]		del self.breadcrumbs[-1]



class xml(Extractor):		class xml(Extractor):
""" Extract messages from XML files.		""" Extract messages from XML files.
"""		"""
Show All 24 Lines	def extractFromFile(self, filepath):
for message, breadcrumbs in jsonExtractor.extractFromString(element.text):		for message, breadcrumbs in jsonExtractor.extractFromString(element.text):
yield message, None, context, json.formatBreadcrumbs(breadcrumbs), position, comments		yield message, None, context, json.formatBreadcrumbs(breadcrumbs), position, comments
else:		else:
breadcrumb = None		breadcrumb = None
if "locationAttributes" in self.keywords[keyword]:		if "locationAttributes" in self.keywords[keyword]:
attributes = [element.get(attribute) for attribute in self.keywords[keyword]["locationAttributes"] if attribute in element.attrib]		attributes = [element.get(attribute) for attribute in self.keywords[keyword]["locationAttributes"] if attribute in element.attrib]
breadcrumb = "({attributes})".format(attributes=", ".join(attributes))		breadcrumb = "({attributes})".format(attributes=", ".join(attributes))
if "context" in element.attrib:		if "context" in element.attrib:
context = unicode(element.get("context"))		context = str(element.get("context"))
elif "tagAsContext" in self.keywords[keyword]:		elif "tagAsContext" in self.keywords[keyword]:
context = keyword		context = keyword
elif "customContext" in self.keywords[keyword]:		elif "customContext" in self.keywords[keyword]:
context = self.keywords[keyword]["customContext"]		context = self.keywords[keyword]["customContext"]
if "comment" in element.attrib:		if "comment" in element.attrib:
comment = element.get("comment")		comment = element.get("comment")
comment = u" ".join(comment.split()) # Remove tabs, line breaks and unecessary spaces.		comment = u" ".join(comment.split()) # Remove tabs, line breaks and unecessary spaces.
comments.append(comment)		comments.append(comment)
if "splitOnWhitespace" in self.keywords[keyword]:		if "splitOnWhitespace" in self.keywords[keyword]:
for splitText in element.text.split():		for splitText in element.text.split():
# split on whitespace is used for token lists, there, a leading '-' means the token has to be removed, so it's not to be processed here either		# split on whitespace is used for token lists, there, a leading '-' means the token has to be removed, so it's not to be processed here either
if splitText[0] != "-":		if splitText[0] != "-":
yield unicode(splitText), None, context, breadcrumb, position, comments		yield str(splitText), None, context, breadcrumb, position, comments
else:		else:
yield unicode(element.text), None, context, breadcrumb, position, comments		yield str(element.text), None, context, breadcrumb, position, comments


# Hack from http://stackoverflow.com/a/2819788		# Hack from http://stackoverflow.com/a/2819788
class FakeSectionHeader(object):		class FakeSectionHeader(object):

def __init__(self, fp):		def __init__(self, fp):
self.fp = fp		self.fp = fp
self.sechead = '[root]\n'		self.sechead = '[root]\n'
Show All 26 Lines