https://github.com/google/mozc/issues/462 --- /src/dictionary/gen_pos_map.py +++ /src/dictionary/gen_pos_map.py @@ -39,7 +39,7 @@ from build_tools import code_generator_util -HEADER = """// Copyright 2009 Google Inc. All Rights Reserved. +HEADER = b"""// Copyright 2009 Google Inc. All Rights Reserved. // Author: keni #ifndef MOZC_DICTIONARY_POS_MAP_H_ @@ -48,13 +48,13 @@ // POS conversion rules const POSMap kPOSMap[] = { """ -FOOTER = """}; +FOOTER = b"""}; #endif // MOZC_DICTIONARY_POS_MAP_H_ """ def ParseUserPos(user_pos_file): - with open(user_pos_file, 'r') as stream: + with open(user_pos_file, 'rb') as stream: stream = code_generator_util.SkipLineComment(stream) stream = code_generator_util.ParseColumnStream(stream, num_column=2) return dict((key, enum_value) for key, enum_value in stream) @@ -64,7 +64,7 @@ user_pos_map = ParseUserPos(user_pos_file) result = {} - with open(third_party_pos_map_file, 'r') as stream: + with open(third_party_pos_map_file, 'rb') as stream: stream = code_generator_util.SkipLineComment(stream) for columns in code_generator_util.ParseColumnStream(stream, num_column=2): third_party_pos_name, mozc_pos = (columns + [None])[:2] @@ -78,7 +78,7 @@ result[third_party_pos_name] = mozc_pos # Create mozc_pos to mozc_pos map. - for key, value in user_pos_map.iteritems(): + for key, value in user_pos_map.items(): if key in result: assert (result[key] == value) continue @@ -94,10 +94,10 @@ if value is None: # Invalid PosType. value = ( - 'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)') + b'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)') else: - value = '::mozc::user_dictionary::UserDictionary::' + value - output.write(' { %s, %s },\n' % (key, value)) + value = b'::mozc::user_dictionary::UserDictionary::' + value + output.write(b' { %s, %s },\n' % (key, value)) output.write(FOOTER) @@ -121,7 +121,7 @@ pos_map = GeneratePosMap(options.third_party_pos_map_file, options.user_pos_file) - with open(options.output, 'w') as stream: + with open(options.output, 'wb') as stream: OutputPosMap(pos_map, stream) --- /src/dictionary/gen_pos_rewrite_rule.py +++ /src/dictionary/gen_pos_rewrite_rule.py @@ -46,29 +46,34 @@ def LoadRewriteMapRule(filename): - fh = open(filename) + fh = open(filename, 'rb') rule = [] for line in fh: - line = line.rstrip('\n') - if not line or line.startswith('#'): + line = line.rstrip(b'\n') + if not line or line.startswith(b'#'): continue fields = line.split() rule.append([fields[0], fields[1]]) + fh.close() return rule def ReadPOSID(id_file, special_pos_file): pos_list = [] - for line in open(id_file, 'r'): + fh = open(id_file, 'rb') + for line in fh: fields = line.split() pos_list.append(fields[1]) + fh.close() - for line in open(special_pos_file, 'r'): - if len(line) <= 1 or line[0] == '#': + fh = open(special_pos_file, 'rb') + for line in fh: + if len(line) <= 1 or line[0:1] == b'#': continue fields = line.split() pos_list.append(fields[0]) + fh.close() return pos_list @@ -112,7 +117,7 @@ ids.append(id) with open(opts.output, 'wb') as f: - f.write(''.join(chr(id) for id in ids)) + f.write(''.join(chr(id) for id in ids).encode('utf-8')) if __name__ == '__main__': --- /src/dictionary/gen_suffix_data.py +++ /src/dictionary/gen_suffix_data.py @@ -52,10 +52,10 @@ opts = _ParseOptions() result = [] - with open(opts.input, 'r') as stream: + with open(opts.input, 'rb') as stream: for line in stream: - line = line.rstrip('\r\n') - fields = line.split('\t') + line = line.rstrip(b'\r\n') + fields = line.split(b'\t') key = fields[0] lid = int(fields[1]) rid = int(fields[2]) @@ -63,7 +63,7 @@ value = fields[4] if key == value: - value = '' + value = b'' result.append((key, value, lid, rid, cost)) --- /src/dictionary/gen_user_pos_data.py +++ /src/dictionary/gen_user_pos_data.py @@ -64,7 +64,7 @@ f.write(struct.pack(' XX町YY and (XX町)ZZ # YY、ZZ # -> YY and ZZ - chou_match = re.match(u'(.*町)?(.*)', level3, re.U) + chou_match = re.match('(.*町)?(.*)', level3, re.U) if chou_match: - chou = u'' + chou = '' if chou_match.group(1): chou = chou_match.group(1) rests = chou_match.group(2) - return [chou + rest for rest in rests.split(u'、')] + return [chou + rest for rest in rests.split('、')] return [level3] def CanParseAddress(address): """Return true for valid address.""" - return (address.find(u'(') == -1 or - address.find(u')') != -1) + return (address.find('(') == -1 or + address.find(')') != -1) def ParseOptions(): --- /src/dictionary/zip_code_util.py +++ /src/dictionary/zip_code_util.py @@ -86,11 +86,11 @@ _SPECIAL_CASES = [ - SpecialMergeZip(u'5900111', u'大阪府', u'堺市中区', [u'三原台']), - SpecialMergeZip(u'8710046', u'大分県', u'中津市', - [u'金谷', u'西堀端', u'東堀端', u'古金谷']), - SpecialMergeZip(u'9218046', u'石川県', u'金沢市', - [u'大桑町', u'三小牛町']), + SpecialMergeZip('5900111', '大阪府', '堺市中区', ['三原台']), + SpecialMergeZip('8710046', '大分県', '中津市', + ['金谷', '西堀端', '東堀端', '古金谷']), + SpecialMergeZip('9218046', '石川県', '金沢市', + ['大桑町', '三小牛町']), ] --- /src/gui/character_pad/data/gen_cp932_map.py +++ /src/gui/character_pad/data/gen_cp932_map.py @@ -32,7 +32,6 @@ import re import sys -import string kUnicodePat = re.compile(r'0x[0-9A-Fa-f]{2,4}') def IsValidUnicode(n): @@ -42,28 +41,29 @@ fh = open(sys.argv[1]) result = {} for line in fh.readlines(): - if line[0] is '#': + if line[0] == '#': continue - array = string.split(line) + array = line.split() sjis = array[0] ucs2 = array[1] if eval(sjis) < 32 or not IsValidUnicode(ucs2): continue result.setdefault(ucs2, sjis) + fh.close() keys = sorted(result.keys()) - print "struct CP932MapData {" - print " unsigned int ucs4;" - print " unsigned short int sjis;" - print "};" - print "" - print "static const size_t kCP932MapDataSize = %d;" % (len(keys)) - print "static const CP932MapData kCP932MapData[] = {" + print("struct CP932MapData {") + print(" unsigned int ucs4;") + print(" unsigned short int sjis;") + print("};") + print("") + print("static const size_t kCP932MapDataSize = %d;" % (len(keys))) + print("static const CP932MapData kCP932MapData[] = {") for n in keys: - print " { %s, %s }," % (n ,result[n]) - print " { 0, 0 }"; - print "};" + print(" { %s, %s }," % (n ,result[n])) + print(" { 0, 0 }"); + print("};") if __name__ == "__main__": main() --- /src/gui/character_pad/data/gen_local_character_map.py +++ /src/gui/character_pad/data/gen_local_character_map.py @@ -30,7 +30,6 @@ __author__ = "taku" -import string import re import sys @@ -43,9 +42,9 @@ fh = open(filename) result = [] for line in fh.readlines(): - if line[0] is '#': + if line[0] == '#': continue - array = string.split(line) + array = line.split() jis = array[0].replace('0x', '') ucs2 = array[1].replace('0x', '') if len(jis) == 2: @@ -53,6 +52,7 @@ if IsValidUnicode(ucs2): result.append([jis, ucs2]) + fh.close() return ["JISX0201", result] @@ -60,13 +60,14 @@ fh = open(filename) result = [] for line in fh.readlines(): - if line[0] is '#': + if line[0] == '#': continue array = line.split() jis = array[1].replace('0x', '') ucs2 = array[2].replace('0x', '') if IsValidUnicode(ucs2): result.append([jis, ucs2]) + fh.close() return ["JISX0208", result] @@ -74,13 +75,14 @@ fh = open(filename) result = [] for line in fh.readlines(): - if line[0] is '#': + if line[0] == '#': continue array = line.split() jis = array[0].replace('0x', '') ucs2 = array[1].replace('0x', '') if IsValidUnicode(ucs2): result.append([jis, ucs2]) + fh.close() return ["JISX0212", result] @@ -88,7 +90,7 @@ fh = open(filename) result = [] for line in fh.readlines(): - if line[0] is '#': + if line[0] == '#': continue array = line.split() sjis = array[0].replace('0x', '') @@ -100,19 +102,20 @@ if IsValidUnicode(ucs2): result.append([sjis, ucs2]) + fh.close() return ["CP932", result] def Output(arg): name = arg[0] result = arg[1] - print "static const size_t k%sMapSize = %d;" % (name, len(result)) - print "static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name) + print("static const size_t k%sMapSize = %d;" % (name, len(result))) + print("static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name)) for n in result: - print " { 0x%s, 0x%s }," % (n[0] ,n[1]) - print " { 0, 0 }"; - print "};" - print "" + print(" { 0x%s, 0x%s }," % (n[0] ,n[1])) + print(" { 0, 0 }"); + print("};") + print("") if __name__ == "__main__": Output(LoadJISX0201(sys.argv[1])) --- /src/gui/character_pad/data/gen_unicode_blocks.py +++ /src/gui/character_pad/data/gen_unicode_blocks.py @@ -33,13 +33,13 @@ import sys import re -re = re.compile('^(.....?)\.\.(.....?); (.+)') +re = re.compile(r'^(.....?)\.\.(.....?); (.+)') def main(): - print "static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {" + print("static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {") fh = open(sys.argv[1]) for line in fh.readlines(): - if line[0] is '#': + if line[0] == '#': continue m = re.match(line) if m is not None: @@ -47,11 +47,12 @@ end = int(m.group(2), 16) name = m.group(3) if start <= 0x2FFFF and end <= 0x2FFFF: - print " { \"%s\", { %d, %d } }," % (name, start, end) + print(" { \"%s\", { %d, %d } }," % (name, start, end)) + fh.close() - print " { NULL, { 0, 0 } }" - print "};" - print "" + print(" { NULL, { 0, 0 } }") + print("};") + print("") if __name__ == "__main__": main() --- /src/gui/character_pad/data/gen_unicode_data.py +++ /src/gui/character_pad/data/gen_unicode_data.py @@ -46,18 +46,19 @@ code = int(code, 16) if code < 0x2FFFF: results.append(" { %d, \"%s\" }," % (code, desc)) + fh.close() - print "struct UnicodeData {"; - print " char32 ucs4;"; - print " const char *description;"; - print "};"; - print "" - print "static const size_t kUnicodeDataSize = %d;" % (len(results)) - print "static const UnicodeData kUnicodeData[] = {"; + print("struct UnicodeData {"); + print(" char32 ucs4;"); + print(" const char *description;"); + print("};"); + print("") + print("static const size_t kUnicodeDataSize = %d;" % (len(results))) + print("static const UnicodeData kUnicodeData[] = {"); for line in results: - print line; - print " { 0, NULL }"; - print "};"; + print(line); + print(" { 0, NULL }"); + print("};"); if __name__ == "__main__": main() --- /src/gui/character_pad/data/gen_unihan_data.py +++ /src/gui/character_pad/data/gen_unihan_data.py @@ -31,35 +31,34 @@ __author__ = "taku" import re -import string import sys rs = {} def Escape(n): - if n is not "NULL": + if n != "NULL": return "\"%s\"" % (n) else: return "NULL" def GetCode(n): - if n is not "NULL": - n = string.replace(n, '0-', 'JIS X 0208: 0x') - n = string.replace(n, '1-', 'JIS X 0212: 0x') - n = string.replace(n, '3-', 'JIS X 0213: 0x') - n = string.replace(n, '4-', 'JIS X 0213: 0x') - n = string.replace(n, 'A-', 'Vendors Ideographs: 0x') - n = string.replace(n, '3A', 'JIS X 0213 2000: 0x') + if n != "NULL": + n = n.replace('0-', 'JIS X 0208: 0x') + n = n.replace('1-', 'JIS X 0212: 0x') + n = n.replace('3-', 'JIS X 0213: 0x') + n = n.replace('4-', 'JIS X 0213: 0x') + n = n.replace('A-', 'Vendors Ideographs: 0x') + n = n.replace('3A', 'JIS X 0213 2000: 0x') return "\"%s\"" % n else: return "NULL" def GetRadical(n): pat = re.compile(r'^(\d+)\.') - if n is not "NULL": + if n != "NULL": m = pat.match(n) if m: result = rs[m.group(1)] - return "\"%s\"" % (result.encode('string_escape')) + return "\"%s\"" % result else: return "NULL" else: @@ -73,6 +72,7 @@ id = array[1] radical = array[2] rs[id] = radical + fh.close() dic = {} pat = re.compile(r'^U\+(\S+)\s+(kTotalStrokes|kJapaneseKun|kJapaneseOn|kRSUnicode|kIRG_JSource)\t(.+)') @@ -86,23 +86,24 @@ n = int(m.group(1), 16) if n <= 65536: dic.setdefault(key, {}).setdefault(field, value) + fh.close() keys = sorted(dic.keys()) - print "struct UnihanData {"; - print " unsigned int ucs4;"; + print("struct UnihanData {"); + print(" unsigned int ucs4;"); # Since the total strokes defined in Unihan data is Chinese-based # number, we can't use it. # print " unsigned char total_strokes;"; - print " const char *japanese_kun;"; - print " const char *japanese_on;"; + print(" const char *japanese_kun;"); + print(" const char *japanese_on;"); # Since the radical information defined in Unihan data is Chinese-based # number, we can't use it. # print " const char *radical;"; - print " const char *IRG_jsource;"; - print "};" - print "static const size_t kUnihanDataSize = %d;" % (len(keys)) - print "static const UnihanData kUnihanData[] = {" + print(" const char *IRG_jsource;"); + print("};") + print("static const size_t kUnihanDataSize = %d;" % (len(keys))) + print("static const UnihanData kUnihanData[] = {") for key in keys: total_strokes = dic[key].get("kTotalStrokes", "0") @@ -111,9 +112,9 @@ rad = GetRadical(dic[key].get("kRSUnicode", "NULL")) code = GetCode(dic[key].get("kIRG_JSource", "NULL")) # print " { 0x%s, %s, %s, %s, %s, %s }," % (key, total_strokes, kun, on, rad, code) - print " { 0x%s, %s, %s, %s }," % (key, kun, on, code) + print(" { 0x%s, %s, %s, %s }," % (key, kun, on, code)) - print "};" + print("};") if __name__ == "__main__": main()