Newer
Older
Import / projects / LGN-IP3870 / t / new / gsm7.py
from utils import print_hex

_xform_table = ( 64, 163, 36, 165, 232, 233, 249, 236, 242, 199, 10, 216, 248, 13, 197, 229, 916, 95, 934, 915,
    923, 937, 960, 968, 931, 920, 926, 27, 198, 230, 223, 201, 32, 33, 34, 35, 164, 37, 38, 39, 40, 41, 42, 43, 44,
    45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 161, 65, 66, 67, 68, 69, 70, 71, 72,
    73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 196, 214, 209, 220, 167, 191, 97, 98, 99,
    100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
    228, 246, 241, 252, 224)

_xform_esc_table = { 0x0A: 12, 0x14: 94, 0x28: 123, 0x29: 125, 0x2F:  92,
    0x3C: 91, 0x3D : 126, 0x3E : 93, 0x40 : 124, 0x65 : 8364}

_xform_table_rev = {}
for i, x in enumerate(_xform_table):
	_xform_table_rev[x] = i

for k, v in _xform_esc_table.iteritems():
	_xform_table_rev[v] = k + 128

# text: utf8
# output: gsm7bit alphabet; 7 bit alphabet with escape
def gsm7bit_encode(text):
	ucs2 = unicode(text, 'utf-8')
	out = ''
	for u in ucs2:
		u = ord(u)
		v = _xform_table_rev.get(u)
		if v == None:
			print 'not in extended gsm alphabet %04X' % (u)
			continue
		if v < 128:
			out += chr(v)
		else:
			out += chr(27) + chr(v - 128)
	return out

def ucs2_encode_for_sim(ucs2):
	out = ''
	for u in ucs2:
		u = ord(u)
		v = _xform_table_rev.get(u)
		if v == None:
			if u < 256:
				out += chr(u)
			continue
		if v < 128:
			out += chr(v)
		else:
			out += chr(27) + chr(v - 128)
	return out

def gsm7_ascii(ucs2):
	out = ''
	for u in ucs2:
		u = ord(u)
		v = _xform_table_rev.get(u)
		if v == None:
			return ''
		if v < 128:
			out += chr(v)
		else:
			out += chr(27) + chr(v - 128)
	return out



# text: gsm7bit alphabet: 7 bit alphabet with escape
# output: utf8
def gsm7bit_decode(text):
	esc = False
	ucs2 = u''
	for t in text:
		t = ord(t)
		if esc:
			esc = False
			u = _xform_esc_table.get(t)
			if u == None:
				print 'FIXME::: escape table: value is empty', t
				continue
			ucs2 += unichr(u)
		elif t == 27:
			esc = True
		elif t < 128:
			ucs2 += unichr(_xform_table[t])
		else:
			ucs2 += ' '
	return ucs2.encode('utf-8')


# 7bit -> 8 bit
def gsm7_pack(data, bit_pos):
	dst = ''
	d = 0
	for s in data:
		s = ord(s)
		if bit_pos <= 1:
			d |= s << bit_pos
			bit_pos += 7
			if bit_pos >= 8:
				dst += chr(d)
				bit_pos -= 8
				d = 0
		else:
			d |= (s << bit_pos) & 0xFF
			bit_pos -= 1
			dst += chr(d)
			d = s >> (7 - bit_pos);
	if bit_pos:
		dst += chr(d)
	return dst


class BitSource:
	def __init__(self, data, bit_pos):
		self.data = data
		self.v = ord(data[0])
		self.di = 1
		self.v >>= bit_pos
		self.bit_pos = 8 - bit_pos
	def get_7bit(self):
		if self.bit_pos >= 7:
			self.bit_pos -= 7
			v = self.v & 0x7F
			self.v >>= 7
			return v
		v = self.v
		self.v = ord(self.data[self.di])
		self.di += 1
		v = v | ((self.v << self.bit_pos) & 0x7F)
		self.v >>= 7 - self.bit_pos
		self.bit_pos += 1
		return v

# 8 bit -> 7 bit
def gsm7_unpack(src, bit_pos, dst_len):
	src = BitSource(src, bit_pos)
	dst = ''
	for i in range(dst_len):
		dst += chr(src.get_7bit())
	return dst


def test():
	# 27, 101
	euro = 'eioeeoiew xie@' * 10
	euro_e = gsm7bit_encode(euro)

	euro8 = gsm7_pack(euro_e, 2)
	x = gsm7_unpack(euro8, 2, len(euro))
	assert x == euro_e

	y  = gsm7bit_decode(x)
	assert y == euro

	x = "\xEF\xB5\xAE\x05\x05"
	y = gsm7_unpack(x, 0, 5)
	print gsm7bit_decode(y)

	x = "\x20"
	y = gsm7_unpack(x, 0, 1)
	print ord(y[0])
	x = gsm7bit_decode(y)
	print ord(x)
def test():
	x = '\xc3\x84L\xc3\x8b\xc3\x96G'
	y = gsm7bit_encode(x)
	z = gsm7_pack(y, 0)
	print len(y), y
	print len(z), z
	a = gsm7_unpack(z, 0, len(y))
	b = gsm7bit_decode(a)
	print repr(b)

def test():
	x = '\x90\x88\x64\x42\xA9\x58\x2E\x98\x8C\x86\xD3\xF1\x7C\x00'
	a = gsm7_unpack(x, 0, 15)
	b = gsm7bit_decode(a)
	x = unicode(b, 'utf-8')
	print 'utf- 8'
	print_hex(b)
	print 'len', len(x), len(b)

def test():
	x = '\x81\x07\x01\x7F\x7B\xE1\xE2\xE3\x04\xEB'
	a = gsm7_unpack(x, 0, 7)
	b = gsm7bit_decode(a)
	x = unicode(b, 'utf-8')
	print 'utf- 8'
	print_hex(b)
	print 'len', len(x), len(b)
	print_hex(x.encode('iso8859-1'))
	print 'iso8859-1', x.encode('iso8859-1')

if __name__ == '__main__':
	test()
# end