from utils import print_hex
_xform_table = ( 64, 163, 36, 165, 232, 233, 249, 236, 242, 199, 10, 216, 248, 13, 197, 229, 916, 95, 934, 915,
923, 937, 960, 968, 931, 920, 926, 27, 198, 230, 223, 201, 32, 33, 34, 35, 164, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 161, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 196, 214, 209, 220, 167, 191, 97, 98, 99,
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
228, 246, 241, 252, 224)
_xform_esc_table = { 0x0A: 12, 0x14: 94, 0x28: 123, 0x29: 125, 0x2F: 92,
0x3C: 91, 0x3D : 126, 0x3E : 93, 0x40 : 124, 0x65 : 8364}
_xform_table_rev = {}
for i, x in enumerate(_xform_table):
_xform_table_rev[x] = i
for k, v in _xform_esc_table.iteritems():
_xform_table_rev[v] = k + 128
# text: utf8
# output: gsm7bit alphabet; 7 bit alphabet with escape
def gsm7bit_encode(text):
ucs2 = unicode(text, 'utf-8')
out = ''
for u in ucs2:
u = ord(u)
v = _xform_table_rev.get(u)
if v == None:
print 'not in extended gsm alphabet %04X' % (u)
continue
if v < 128:
out += chr(v)
else:
out += chr(27) + chr(v - 128)
return out
def ucs2_encode_for_sim(ucs2):
out = ''
for u in ucs2:
u = ord(u)
v = _xform_table_rev.get(u)
if v == None:
if u < 256:
out += chr(u)
continue
if v < 128:
out += chr(v)
else:
out += chr(27) + chr(v - 128)
return out
def gsm7_ascii(ucs2):
out = ''
for u in ucs2:
u = ord(u)
v = _xform_table_rev.get(u)
if v == None:
return ''
if v < 128:
out += chr(v)
else:
out += chr(27) + chr(v - 128)
return out
# text: gsm7bit alphabet: 7 bit alphabet with escape
# output: utf8
def gsm7bit_decode(text):
esc = False
ucs2 = u''
for t in text:
t = ord(t)
if esc:
esc = False
u = _xform_esc_table.get(t)
if u == None:
print 'FIXME::: escape table: value is empty', t
continue
ucs2 += unichr(u)
elif t == 27:
esc = True
elif t < 128:
ucs2 += unichr(_xform_table[t])
else:
ucs2 += ' '
return ucs2.encode('utf-8')
# 7bit -> 8 bit
def gsm7_pack(data, bit_pos):
dst = ''
d = 0
for s in data:
s = ord(s)
if bit_pos <= 1:
d |= s << bit_pos
bit_pos += 7
if bit_pos >= 8:
dst += chr(d)
bit_pos -= 8
d = 0
else:
d |= (s << bit_pos) & 0xFF
bit_pos -= 1
dst += chr(d)
d = s >> (7 - bit_pos);
if bit_pos:
dst += chr(d)
return dst
class BitSource:
def __init__(self, data, bit_pos):
self.data = data
self.v = ord(data[0])
self.di = 1
self.v >>= bit_pos
self.bit_pos = 8 - bit_pos
def get_7bit(self):
if self.bit_pos >= 7:
self.bit_pos -= 7
v = self.v & 0x7F
self.v >>= 7
return v
v = self.v
self.v = ord(self.data[self.di])
self.di += 1
v = v | ((self.v << self.bit_pos) & 0x7F)
self.v >>= 7 - self.bit_pos
self.bit_pos += 1
return v
# 8 bit -> 7 bit
def gsm7_unpack(src, bit_pos, dst_len):
src = BitSource(src, bit_pos)
dst = ''
for i in range(dst_len):
dst += chr(src.get_7bit())
return dst
def test():
# 27, 101
euro = 'eioeeoiew xie@' * 10
euro_e = gsm7bit_encode(euro)
euro8 = gsm7_pack(euro_e, 2)
x = gsm7_unpack(euro8, 2, len(euro))
assert x == euro_e
y = gsm7bit_decode(x)
assert y == euro
x = "\xEF\xB5\xAE\x05\x05"
y = gsm7_unpack(x, 0, 5)
print gsm7bit_decode(y)
x = "\x20"
y = gsm7_unpack(x, 0, 1)
print ord(y[0])
x = gsm7bit_decode(y)
print ord(x)
def test():
x = '\xc3\x84L\xc3\x8b\xc3\x96G'
y = gsm7bit_encode(x)
z = gsm7_pack(y, 0)
print len(y), y
print len(z), z
a = gsm7_unpack(z, 0, len(y))
b = gsm7bit_decode(a)
print repr(b)
def test():
x = '\x90\x88\x64\x42\xA9\x58\x2E\x98\x8C\x86\xD3\xF1\x7C\x00'
a = gsm7_unpack(x, 0, 15)
b = gsm7bit_decode(a)
x = unicode(b, 'utf-8')
print 'utf- 8'
print_hex(b)
print 'len', len(x), len(b)
def test():
x = '\x81\x07\x01\x7F\x7B\xE1\xE2\xE3\x04\xEB'
a = gsm7_unpack(x, 0, 7)
b = gsm7bit_decode(a)
x = unicode(b, 'utf-8')
print 'utf- 8'
print_hex(b)
print 'len', len(x), len(b)
print_hex(x.encode('iso8859-1'))
print 'iso8859-1', x.encode('iso8859-1')
if __name__ == '__main__':
test()
# end