Import/research/ui/toolkit/convert_xlsx_to_sql.py at 968ecbf457fa6645a22c91c52e46b7adda1d700b

Fork: 0

jryland / Import

Find file

Newer

Older

Import / research / ui / toolkit / convert_xlsx_to_sql.py

John on 29 Dec 2020 2 KB bulk import from macbookpro checkouts

Raw Blame History

#!/usr/bin/python
import xml.dom.minidom
import zipfile
import re


with zipfile.ZipFile('DB2.xlsx', 'r') as zipFile:
  # zipFile.printdir()
  # xmlVal = xml.dom.minidom.parseString(zipFile.read('[Content_Types].xml'))
  # for page in xmlVal.getElementsByTagName("Override"):
  #     file = page.getAttribute("PartName")
  #     print "----" + file + "----"
  #     file = file.strip("/")
  #     print xml.dom.minidom.parseString(zipFile.read(file)).toprettyxml()
  #        print sheet.toprettyxml()
  stringTable = xml.dom.minidom.parseString(zipFile.read('xl/sharedStrings.xml'))
  strings = stringTable.getElementsByTagName("si")
  workbook = xml.dom.minidom.parseString(zipFile.read('xl/workbook.xml'))
  for sheet in workbook.getElementsByTagName("sheet"):
    name = sheet.getAttribute("name")
    sheetId = sheet.getAttribute("sheetId")
    sheet = xml.dom.minidom.parseString(zipFile.read('xl/worksheets/sheet' + sheetId + '.xml'))
    sheetData = {}
    sheetCols = 0
    sheetRows = 0
    for cell in sheet.getElementsByTagName("c"):
      r = cell.getAttribute("r") # Cell position
      s = cell.getAttribute("s") # ?
      t = cell.getAttribute("t") # Value type
      cellPos = re.compile(r'(\c+|\d+)').split(r)
      col = ord(cellPos[0]) - ord('A')
      row = int(cellPos[1])
      if col > sheetCols:
          sheetCols = col
      if row > sheetRows:
          sheetRows = row
      # print 'pos:' + str(col) + ',' + str(row)
      if t:
        value = cell.getElementsByTagName("v")[0].firstChild.nodeValue
        if t == 's': # string type
          # print strings[int(value)].getElementsByTagName("t")[0].firstChild.nodeValue + ','
          value = strings[int(value)].getElementsByTagName("t")[0].firstChild.nodeValue
        elif t == 'n': # number type
          # print value + ','
          value = value
        else: # unkown type
          print "Unknown type: " + t
        sheetData[col,row] = value
    print "----" + name + "----"
    # print sheetData
    # print str(sheetCols) + " x " + str(sheetRows)
    for row in range(1, sheetRows+1):
        rowStr = ""
        for col in range(sheetCols+1):
            if col > 0:
                rowStr += ","
            if (col,row) in sheetData:
                rowStr += sheetData[col,row]
        print rowStr
    print