summaryrefslogtreecommitdiff
path: root/app-i18n/mozc/files/mozc-2.23.2815.102-python-3_4.patch
diff options
context:
space:
mode:
authorV3n3RiX <venerix@koprulu.sector>2022-03-20 00:40:44 +0000
committerV3n3RiX <venerix@koprulu.sector>2022-03-20 00:40:44 +0000
commit4cbcc855382a06088e2f016f62cafdbcb7e40665 (patch)
tree356496503d52354aa6d9f2d36126302fed5f3a73 /app-i18n/mozc/files/mozc-2.23.2815.102-python-3_4.patch
parentfcc5224904648a8e6eb528d7603154160a20022f (diff)
gentoo resync : 20.03.2022
Diffstat (limited to 'app-i18n/mozc/files/mozc-2.23.2815.102-python-3_4.patch')
-rw-r--r--app-i18n/mozc/files/mozc-2.23.2815.102-python-3_4.patch537
1 files changed, 0 insertions, 537 deletions
diff --git a/app-i18n/mozc/files/mozc-2.23.2815.102-python-3_4.patch b/app-i18n/mozc/files/mozc-2.23.2815.102-python-3_4.patch
deleted file mode 100644
index 41d2bf9eeb90..000000000000
--- a/app-i18n/mozc/files/mozc-2.23.2815.102-python-3_4.patch
+++ /dev/null
@@ -1,537 +0,0 @@
-https://github.com/google/mozc/issues/462
-
---- /src/prediction/gen_zero_query_data.py
-+++ /src/prediction/gen_zero_query_data.py
-@@ -59,20 +59,20 @@
- Returns:
- A integer indicating parsed pua.
- """
-- if not s or s[0] == '>':
-+ if not s or s[0:1] == b'>':
- return 0
- return int(s, 16)
-
-
- def NormalizeString(string):
- return unicodedata.normalize(
-- 'NFKC', string.decode('utf-8')).encode('utf-8').replace('~', '〜')
-+ 'NFKC', string.decode('utf-8')).replace('~', '〜').encode('utf-8')
-
-
- def RemoveTrailingNumber(string):
- if not string:
-- return ''
-- return re.sub(r'^([^0-9]+)[0-9]+$', r'\1', string)
-+ return b''
-+ return re.sub(br'^([^0-9]+)[0-9]+$', r'\1', string)
-
-
- def GetReadingsFromDescription(description):
-@@ -84,19 +84,19 @@
- # - ビル・建物
- # \xE3\x83\xBB : "・"
- return [RemoveTrailingNumber(token) for token
-- in re.split(r'(?:\(|\)|/|\xE3\x83\xBB)+', normalized)]
-+ in re.split(br'(?:\(|\)|/|\xE3\x83\xBB)+', normalized)]
-
-
- def ReadEmojiTsv(stream):
- """Reads emoji data from stream and returns zero query data."""
- zero_query_dict = defaultdict(list)
- stream = code_generator_util.SkipLineComment(stream)
-- for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
-+ for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
- if len(columns) != 13:
-- logging.critical('format error: %s', '\t'.join(columns))
-+ logging.critical('format error: %s', b'\t'.join(columns))
- sys.exit(1)
-
-- code_points = columns[0].split(' ')
-+ code_points = columns[0].split(b' ')
-
- # Emoji code point.
- emoji = columns[1]
-@@ -114,12 +114,12 @@
- # - Composite emoji which has multiple code point.
- # NOTE: Some Unicode 6.0 emoji don't have PUA, and it is also omitted.
- # TODO(hsumita): Check the availability of such emoji and enable it.
-- logging.info('Skip %s', ' '.join(code_points))
-+ logging.info('Skip %s', b' '.join(code_points))
- continue
-
- reading_list = []
- # \xe3\x80\x80 is a full-width space
-- for reading in re.split(r'(?: |\xe3\x80\x80)+', NormalizeString(readings)):
-+ for reading in re.split(br'(?: |\xe3\x80\x80)+', NormalizeString(readings)):
- if not reading:
- continue
- reading_list.append(reading)
-@@ -158,15 +158,15 @@
- zero_query_dict = defaultdict(list)
-
- for line in input_stream:
-- if line.startswith('#'):
-+ if line.startswith(b'#'):
- continue
-- line = line.rstrip('\r\n')
-+ line = line.rstrip(b'\r\n')
- if not line:
- continue
-
-- tokens = line.split('\t')
-+ tokens = line.split(b'\t')
- key = tokens[0]
-- values = tokens[1].split(',')
-+ values = tokens[1].split(b',')
-
- for value in values:
- zero_query_dict[key].append(
-@@ -179,16 +179,16 @@
- """Reads emoticon data from stream and returns zero query data."""
- zero_query_dict = defaultdict(list)
- stream = code_generator_util.SkipLineComment(stream)
-- for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
-+ for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
- if len(columns) != 3:
-- logging.critical('format error: %s', '\t'.join(columns))
-+ logging.critical('format error: %s', b'\t'.join(columns))
- sys.exit(1)
-
- emoticon = columns[0]
- readings = columns[2]
-
- # \xe3\x80\x80 is a full-width space
-- for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
-+ for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
- if not reading:
- continue
- zero_query_dict[reading].append(
-@@ -202,9 +202,9 @@
- """Reads emoji data from stream and returns zero query data."""
- zero_query_dict = defaultdict(list)
- stream = code_generator_util.SkipLineComment(stream)
-- for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
-+ for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
- if len(columns) < 3:
-- logging.warning('format error: %s', '\t'.join(columns))
-+ logging.warning('format error: %s', b'\t'.join(columns))
- continue
-
- symbol = columns[1]
-@@ -222,7 +222,7 @@
- continue
-
- # \xe3\x80\x80 is a full-width space
-- for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
-+ for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
- if not reading:
- continue
- zero_query_dict[reading].append(
-@@ -247,7 +247,7 @@
-
- def IsValidKeyForZeroQuery(key):
- """Returns if the key is valid for zero query trigger."""
-- is_ascii = all(ord(char) < 128 for char in key)
-+ is_ascii = all(char < 128 for char in key)
- return not is_ascii
-
-
-@@ -301,13 +301,13 @@
-
- def main():
- options = ParseOptions()
-- with open(options.input_rule, 'r') as input_stream:
-+ with open(options.input_rule, 'rb') as input_stream:
- zero_query_rule_dict = ReadZeroQueryRuleData(input_stream)
-- with open(options.input_symbol, 'r') as input_stream:
-+ with open(options.input_symbol, 'rb') as input_stream:
- zero_query_symbol_dict = ReadSymbolTsv(input_stream)
-- with open(options.input_emoji, 'r') as input_stream:
-+ with open(options.input_emoji, 'rb') as input_stream:
- zero_query_emoji_dict = ReadEmojiTsv(input_stream)
-- with open(options.input_emoticon, 'r') as input_stream:
-+ with open(options.input_emoticon, 'rb') as input_stream:
- zero_query_emoticon_dict = ReadEmoticonTsv(input_stream)
-
- merged_zero_query_dict = MergeZeroQueryData(
---- /src/prediction/gen_zero_query_number_data.py
-+++ /src/prediction/gen_zero_query_number_data.py
-@@ -41,15 +41,15 @@
- zero_query_dict = defaultdict(list)
-
- for line in input_stream:
-- if line.startswith('#'):
-+ if line.startswith(b'#'):
- continue
-- line = line.rstrip('\r\n')
-+ line = line.rstrip(b'\r\n')
- if not line:
- continue
-
-- tokens = line.split('\t')
-+ tokens = line.split(b'\t')
- key = tokens[0]
-- values = tokens[1].split(',')
-+ values = tokens[1].split(b',')
-
- for value in values:
- zero_query_dict[key].append(
-@@ -71,7 +71,7 @@
-
- def main():
- options = ParseOption()
-- with open(options.input, 'r') as input_stream:
-+ with open(options.input, 'rb') as input_stream:
- zero_query_dict = ReadZeroQueryNumberData(input_stream)
- util.WriteZeroQueryData(zero_query_dict,
- options.output_token_array,
---- /src/prediction/gen_zero_query_util.py
-+++ /src/prediction/gen_zero_query_util.py
-@@ -69,7 +69,7 @@
- output_string_array):
- # Collect all the strings and assing index in ascending order
- string_index = {}
-- for key, entry_list in zero_query_dict.iteritems():
-+ for key, entry_list in zero_query_dict.items():
- string_index[key] = 0
- for entry in entry_list:
- string_index[entry.value] = 0
---- /src/rewriter/gen_counter_suffix_array.py
-+++ /src/rewriter/gen_counter_suffix_array.py
-@@ -43,7 +43,7 @@
- with codecs.open(id_file, 'r', encoding='utf-8') as stream:
- stream = code_generator_util.ParseColumnStream(stream, num_column=2)
- for pos_id, pos_name in stream:
-- if pos_name.startswith(u'名詞,接尾,助数詞'):
-+ if pos_name.startswith('名詞,接尾,助数詞'):
- pos_ids.add(pos_id)
- return pos_ids
-
---- /src/rewriter/gen_emoji_rewriter_data.py
-+++ /src/rewriter/gen_emoji_rewriter_data.py
-@@ -74,19 +74,19 @@
- the glyph (in other words, it has alternative (primary) code point, which
- doesn't lead '>' and that's why we'll ignore it).
- """
-- if not s or s[0] == '>':
-+ if not s or s[0:1] == b'>':
- return None
- return int(s, 16)
-
-
--_FULLWIDTH_RE = re.compile(ur'[!-~]') # U+FF01 - U+FF5E
-+_FULLWIDTH_RE = re.compile(r'[!-~]') # U+FF01 - U+FF5E
-
-
- def NormalizeString(string):
- """Normalize full width ascii characters to half width characters."""
-- offset = ord(u'A') - ord(u'A')
-- return _FULLWIDTH_RE.sub(lambda x: unichr(ord(x.group(0)) - offset),
-- unicode(string, 'utf-8')).encode('utf-8')
-+ offset = ord('A') - ord('A')
-+ return _FULLWIDTH_RE.sub(lambda x: chr(ord(x.group(0)) - offset),
-+ string.decode('utf-8')).encode('utf-8')
-
-
- def ReadEmojiTsv(stream):
-@@ -96,14 +96,14 @@
- token_dict = defaultdict(list)
-
- stream = code_generator_util.SkipLineComment(stream)
-- for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
-+ for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
- if len(columns) != 13:
-- logging.critical('format error: %s', '\t'.join(columns))
-+ logging.critical('format error: %s', b'\t'.join(columns))
- sys.exit(1)
-
-- code_points = columns[0].split(' ')
-+ code_points = columns[0].split(b' ')
- # Emoji code point.
-- emoji = columns[1] if columns[1] else ''
-+ emoji = columns[1] if columns[1] else b''
- android_pua = ParseCodePoint(columns[2])
- docomo_pua = ParseCodePoint(columns[3])
- softbank_pua = ParseCodePoint(columns[4])
-@@ -112,10 +112,10 @@
- readings = columns[6]
-
- # [7]: Name defined in Unicode. It is ignored in current implementation.
-- utf8_description = columns[8] if columns[8] else ''
-- docomo_description = columns[9] if columns[9] else ''
-- softbank_description = columns[10] if columns[10] else ''
-- kddi_description = columns[11] if columns[11] else ''
-+ utf8_description = columns[8] if columns[8] else b''
-+ docomo_description = columns[9] if columns[9] else b''
-+ softbank_description = columns[10] if columns[10] else b''
-+ kddi_description = columns[11] if columns[11] else b''
-
- if not android_pua or len(code_points) > 1:
- # Skip some emoji, which is not supported on old devices.
-@@ -123,7 +123,7 @@
- # - Composite emoji which has multiple code point.
- # NOTE: Some Unicode 6.0 emoji don't have PUA, and it is also omitted.
- # TODO(hsumita): Check the availability of such emoji and enable it.
-- logging.info('Skip %s', ' '.join(code_points))
-+ logging.info('Skip %s', b' '.join(code_points))
- continue
-
- # Check consistency between carrier PUA codes and descriptions for Android
-@@ -132,7 +132,7 @@
- (bool(softbank_pua) != bool(softbank_description)) or
- (bool(kddi_pua) != bool(kddi_description))):
- logging.warning('carrier PUA and description conflict: %s',
-- '\t'.join(columns))
-+ b'\t'.join(columns))
- continue
-
- # Check if the character is usable on Android.
-@@ -140,7 +140,7 @@
- android_pua = 0 # Replace None with 0.
-
- if not emoji and not android_pua:
-- logging.info('Skip: %s', '\t'.join(columns))
-+ logging.info('Skip: %s', b'\t'.join(columns))
- continue
-
- index = len(emoji_data_list)
-@@ -149,7 +149,7 @@
- kddi_description))
-
- # \xe3\x80\x80 is a full-width space
-- for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
-+ for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
- if reading:
- token_dict[NormalizeString(reading)].append(index)
-
-@@ -159,7 +159,7 @@
- def OutputData(emoji_data_list, token_dict,
- token_array_file, string_array_file):
- """Output token and string arrays to files."""
-- sorted_token_dict = sorted(token_dict.iteritems())
-+ sorted_token_dict = sorted(token_dict.items())
-
- strings = {}
- for reading, _ in sorted_token_dict:
-@@ -171,7 +171,7 @@
- strings[docomo_description] = 0
- strings[softbank_description] = 0
- strings[kddi_description] = 0
-- sorted_strings = sorted(strings.iterkeys())
-+ sorted_strings = sorted(strings.keys())
- for index, s in enumerate(sorted_strings):
- strings[s] = index
-
-@@ -205,7 +205,7 @@
-
- def main():
- options = ParseOptions()
-- with open(options.input, 'r') as input_stream:
-+ with open(options.input, 'rb') as input_stream:
- (emoji_data_list, token_dict) = ReadEmojiTsv(input_stream)
-
- OutputData(emoji_data_list, token_dict,
---- /src/rewriter/gen_reading_correction_data.py
-+++ /src/rewriter/gen_reading_correction_data.py
-@@ -63,7 +63,7 @@
- def WriteData(input_path, output_value_array_path, output_error_array_path,
- output_correction_array_path):
- outputs = []
-- with open(input_path) as input_stream:
-+ with open(input_path, 'rb') as input_stream:
- input_stream = code_generator_util.SkipLineComment(input_stream)
- input_stream = code_generator_util.ParseColumnStream(input_stream,
- num_column=3)
-@@ -73,7 +73,7 @@
-
- # In order to lookup the entries via |error| with binary search,
- # sort outputs here.
-- outputs.sort(lambda x, y: cmp(x[1], y[1]) or cmp(x[0], y[0]))
-+ outputs.sort(key=lambda x: (x[1], x[0]))
-
- serialized_string_array_builder.SerializeToFile(
- [value for (value, _, _) in outputs], output_value_array_path)
---- /src/rewriter/gen_single_kanji_rewriter_data.py
-+++ /src/rewriter/gen_single_kanji_rewriter_data.py
-@@ -52,7 +52,7 @@
- stream = code_generator_util.ParseColumnStream(stream, num_column=2)
- outputs = list(stream)
- # For binary search by |key|, sort outputs here.
-- outputs.sort(lambda x, y: cmp(x[0], y[0]))
-+ outputs.sort(key=lambda x: x[0])
-
- return outputs
-
-@@ -72,7 +72,7 @@
- variant_items.append([target, original, len(variant_types) - 1])
-
- # For binary search by |target|, sort variant items here.
-- variant_items.sort(lambda x, y: cmp(x[0], y[0]))
-+ variant_items.sort(key=lambda x: x[0])
-
- return (variant_types, variant_items)
-
-@@ -151,10 +151,10 @@
- def main():
- options = _ParseOptions()
-
-- with open(options.single_kanji_file, 'r') as single_kanji_stream:
-+ with open(options.single_kanji_file, 'rb') as single_kanji_stream:
- single_kanji = ReadSingleKanji(single_kanji_stream)
-
-- with open(options.variant_file, 'r') as variant_stream:
-+ with open(options.variant_file, 'rb') as variant_stream:
- variant_info = ReadVariant(variant_stream)
-
- WriteSingleKanji(single_kanji,
---- /src/session/gen_session_stress_test_data.py
-+++ /src/session/gen_session_stress_test_data.py
-@@ -50,24 +50,26 @@
- """
- result = ''
- for c in s:
-- hexstr = hex(ord(c))
-+ hexstr = hex(c)
- # because hexstr contains '0x', remove the prefix and add our prefix
- result += '\\x' + hexstr[2:]
- return result
-
- def GenerateHeader(file):
- try:
-- print "const char *kTestSentences[] = {"
-- for line in open(file, "r"):
-- if line.startswith('#'):
-+ print("const char *kTestSentences[] = {")
-+ fh = open(file, "rb")
-+ for line in fh:
-+ if line.startswith(b'#'):
- continue
-- line = line.rstrip('\r\n')
-+ line = line.rstrip(b'\r\n')
- if not line:
- continue
-- print " \"%s\"," % escape_string(line)
-- print "};"
-+ print(" \"%s\"," % escape_string(line))
-+ fh.close()
-+ print("};")
- except:
-- print "cannot open %s" % (file)
-+ print("cannot open %s" % (file))
- sys.exit(1)
-
- def main():
---- /src/unix/ibus/gen_mozc_xml.py
-+++ /src/unix/ibus/gen_mozc_xml.py
-@@ -74,7 +74,7 @@
-
-
- def OutputXmlElement(param_dict, element_name, value):
-- print ' <%s>%s</%s>' % (element_name, (value % param_dict), element_name)
-+ print(' <%s>%s</%s>' % (element_name, (value % param_dict), element_name))
-
-
- def OutputXml(param_dict, component, engine_common, engines, setup_arg):
-@@ -90,26 +90,26 @@
- engines: A dictionary from a property name to a list of property values of
- engines. For example, {'name': ['mozc-jp', 'mozc', 'mozc-dv']}.
- """
-- print '<component>'
-- for key in component:
-+ print('<component>')
-+ for key in sorted(component):
- OutputXmlElement(param_dict, key, component[key])
-- print '<engines>'
-+ print('<engines>')
- for i in range(len(engines['name'])):
-- print '<engine>'
-- for key in engine_common:
-+ print('<engine>')
-+ for key in sorted(engine_common):
- OutputXmlElement(param_dict, key, engine_common[key])
- if setup_arg:
- OutputXmlElement(param_dict, 'setup', ' '.join(setup_arg))
-- for key in engines:
-+ for key in sorted(engines):
- OutputXmlElement(param_dict, key, engines[key][i])
-- print '</engine>'
-- print '</engines>'
-- print '</component>'
-+ print('</engine>')
-+ print('</engines>')
-+ print('</component>')
-
-
- def OutputCppVariable(param_dict, prefix, variable_name, value):
-- print 'const char k%s%s[] = "%s";' % (prefix, variable_name.capitalize(),
-- (value % param_dict))
-+ print('const char k%s%s[] = "%s";' % (prefix, variable_name.capitalize(),
-+ (value % param_dict)))
-
-
- def OutputCpp(param_dict, component, engine_common, engines):
-@@ -122,18 +122,18 @@
- engines: ditto.
- """
- guard_name = 'MOZC_UNIX_IBUS_MAIN_H_'
-- print CPP_HEADER % (guard_name, guard_name)
-- for key in component:
-+ print(CPP_HEADER % (guard_name, guard_name))
-+ for key in sorted(component):
- OutputCppVariable(param_dict, 'Component', key, component[key])
-- for key in engine_common:
-+ for key in sorted(engine_common):
- OutputCppVariable(param_dict, 'Engine', key, engine_common[key])
-- for key in engines:
-- print 'const char* kEngine%sArray[] = {' % key.capitalize()
-+ for key in sorted(engines):
-+ print('const char* kEngine%sArray[] = {' % key.capitalize())
- for i in range(len(engines[key])):
-- print '"%s",' % (engines[key][i] % param_dict)
-- print '};'
-- print 'const size_t kEngineArrayLen = %s;' % len(engines['name'])
-- print CPP_FOOTER % guard_name
-+ print('"%s",' % (engines[key][i] % param_dict))
-+ print('};')
-+ print('const size_t kEngineArrayLen = %s;' % len(engines['name']))
-+ print(CPP_FOOTER % guard_name)
-
-
- def CheckIBusVersion(options, minimum_version):
---- /src/usage_stats/gen_stats_list.py
-+++ /src/usage_stats/gen_stats_list.py
-@@ -37,23 +37,24 @@
-
- def GetStatsNameList(filename):
- stats = []
-- for line in open(filename, 'r'):
-- stat = line.strip()
-- if not stat or stat[0] == '#':
-- continue
-- stats.append(stat)
-+ with open(filename, 'r') as file:
-+ for line in file:
-+ stat = line.strip()
-+ if not stat or stat[0] == '#':
-+ continue
-+ stats.append(stat)
- return stats
-
-
- def main():
- stats_list = GetStatsNameList(sys.argv[1])
-- print '// This header file is generated by gen_stats_list.py'
-+ print('// This header file is generated by gen_stats_list.py')
- for stats in stats_list:
-- print 'const char k%s[] = "%s";' % (stats, stats)
-- print 'const char *kStatsList[] = {'
-+ print('const char k%s[] = "%s";' % (stats, stats))
-+ print('const char *kStatsList[] = {')
- for stats in stats_list:
-- print ' k%s,' % (stats)
-- print '};'
-+ print(' k%s,' % (stats))
-+ print('};')
-
-
- if __name__ == '__main__':