#!/usr/bin/python
import xml.dom.minidom
import zipfile
import re
with zipfile.ZipFile('DB2.xlsx', 'r') as zipFile:
# zipFile.printdir()
# xmlVal = xml.dom.minidom.parseString(zipFile.read('[Content_Types].xml'))
# for page in xmlVal.getElementsByTagName("Override"):
# file = page.getAttribute("PartName")
# print "----" + file + "----"
# file = file.strip("/")
# print xml.dom.minidom.parseString(zipFile.read(file)).toprettyxml()
# print sheet.toprettyxml()
stringTable = xml.dom.minidom.parseString(zipFile.read('xl/sharedStrings.xml'))
strings = stringTable.getElementsByTagName("si")
workbook = xml.dom.minidom.parseString(zipFile.read('xl/workbook.xml'))
for sheet in workbook.getElementsByTagName("sheet"):
name = sheet.getAttribute("name")
sheetId = sheet.getAttribute("sheetId")
sheet = xml.dom.minidom.parseString(zipFile.read('xl/worksheets/sheet' + sheetId + '.xml'))
sheetData = {}
sheetCols = 0
sheetRows = 0
for cell in sheet.getElementsByTagName("c"):
r = cell.getAttribute("r") # Cell position
s = cell.getAttribute("s") # ?
t = cell.getAttribute("t") # Value type
cellPos = re.compile(r'(\c+|\d+)').split(r)
col = ord(cellPos[0]) - ord('A')
row = int(cellPos[1])
if col > sheetCols:
sheetCols = col
if row > sheetRows:
sheetRows = row
# print 'pos:' + str(col) + ',' + str(row)
if t:
value = cell.getElementsByTagName("v")[0].firstChild.nodeValue
if t == 's': # string type
# print strings[int(value)].getElementsByTagName("t")[0].firstChild.nodeValue + ','
value = strings[int(value)].getElementsByTagName("t")[0].firstChild.nodeValue
elif t == 'n': # number type
# print value + ','
value = value
else: # unkown type
print "Unknown type: " + t
sheetData[col,row] = value
print "----" + name + "----"
# print sheetData
# print str(sheetCols) + " x " + str(sheetRows)
for row in range(1, sheetRows+1):
rowStr = ""
for col in range(sheetCols+1):
if col > 0:
rowStr += ","
if (col,row) in sheetData:
rowStr += sheetData[col,row]
print rowStr
print