#!/usr/bin/python import re import sys KEY = "_" # This can be modified for other accelerator keys, like '&' for KDE def construct_re(word): """Construct a regular expresion out of simple word mapping table. This replaces "^" at the begining of word and "$" at end of word with "\b". It also adds "_" characters in front of every alphanumeric character, and encloses entire regexp in parentheses. """ start = 0 end = 0 if (word[0] == '^'): reg = r"\b" + word[1:] start = 1 if (word[len(word)-1] == '$'): reg = reg[0:-1] + "\\b" end = 1 res = reg reg = "" for x in res: if x.isalnum() and reg[len(reg)-1]!='\\': reg += r"(?:%s)?" % (KEY) + x else: reg += x #print reg.encode('utf-8') return (r"(%s)" % (reg), start, end) def construct_replacement(found, mapped): # strip accelerator keys from word to detect case bare = found.replace(KEY,"") if bare.isupper(): mapped = mapped.upper() elif bare[0].isupper(): mapped = mapped.capitalize() accpos = found.find(KEY) if accpos != -1 and len(found)>accpos+1: letter = found[accpos+1] mapped = mapped.replace(letter,"%s%s" % (KEY, letter), 1) #print found.encode('utf-8') + " : " + mapped.encode('utf-8') return mapped def replace(text): for needle in map: tuple = construct_re(needle) reg = tuple[0] #print reg.encode('utf-8') inword = re.compile(reg, re.IGNORECASE + re.UNICODE) # Lets use locale settings here for word in inword.finditer(text): found = word.group(1) if tuple[1]: nowreg = r"\b%s" % (found) else: nowreg = found if tuple[2]: nowreg += r"\b" now = re.compile(nowreg, re.UNICODE) replacement = construct_replacement(found, map[needle]) text = now.sub(replacement, text) return text def readmap(filename = '/home/danilo/bin/ijekavski-tabela'): input = file(filename, "r") lines = input.readlines() for line in lines: line = line.decode('utf-8') pair = line.strip().split(':') map[pair[0]] = pair[1] map = {} readmap() input = sys.stdin.read().decode('utf-8') #print input.encode('utf-8') print replace(input).encode('utf-8')