# xcp.py: XeTeX Chinese Proprocessor 0.2.2 # Changelog: # 0.2.2: prevent unnecessary processing with '@' # before '{' # 0.2.1: minor upgrade to fix comment handling import codecs, sys LINE_END = '\n' MARK_START = '\\begin{document}' MARK_END = '\\end{document}' VERB_START = '\\begin{verbatim}' VERB_END = '\\end{verbatim}' try: f = codecs.open(sys.argv[1], encoding='utf-8') except IOError: print "file", sys.argv[1], "no exist" sys.exit() start = 0 stdout = sys.stdout def output(x): stdout.write(x.encode('utf-8')) def output_line_end(): output(LINE_END) def isch(x): # Punct & Radicals if x >= 0x2e80 and x <= 0x33ff: return 1 # Fullwidth Latin Characters if x >= 0xff00 and x <= 0xffef: return 1 # CJK Unified Ideographs & # CJK Unified Ideographs Extension A if x >= 0x4e00 and x <= 0x9fbb: return 1 # CJK Compatibility Ideographs if x >= 0xf900 and x <= 0xfad9: return 1 # CJK Unified Ideographs Extension B if x >= 0x20000 and x <= 0x2a6d6: return 1 # CJK Compatibility Supplement if x >= 0x2f800 and x <= 0x2fa1d: return 1 return 0 ch = 0 b = 0 lc = 0 comment = 0 enable = 1 level = 0 verbatim = 0 for line in f: if verbatim == 0 and line.find(MARK_END) != -1: if ch == 1: output('}') output_line_end() start = 0 if start > 0: if line.find(VERB_START) != -1: verbatim = 1 if line.find(VERB_END) != -1: verbatim = 0 # if last \zh{ has not yet been closed if ch == 1 and (isch(ord(line[0])) == 0 or line[0] == '\n'): ch = 0 output('}') if isch(lc) and isch(ord(line[0])): output('%') if b == 0: b = 1 else: output_line_end() # if line.find('\\XeTeX') != -1: # output(line[:-1]) # continue if line[0] == LINE_END: lc = ord(LINE_END) line_len = len(line) for i in range(line_len): c = line[i] if c == '%' and lc != ord('\\'): comment = 1 if c == LINE_END: comment = 0 break n = ord(c) if comment == 1: output(c) lc = n continue if c == '@' and i + 1 < line_len and line[i + 1] == '{': enable = 0 level = 0 continue # match parens if enable == 0 and c == '{': level += 1 if enable == 0 and c == '}': level -= 1 if enable == 0 and level == 0: enable = 1 # output directly without processing if enable == 0: output(c) lc = n continue # chinese starts if ch == 0 and isch(n) == 1: output('\\zh{') ch = 1 elif ch == 1 and isch(n) == 0: output('}') ch = 0 lc = n output(c) else: output(line) if line.find(MARK_START) != -1: start = 1