#!/usr/bin/python
#
# Compares two json files (eg: base.json & new.json) and outputs a
# resulting json file (eg: diff.json) which could be used to merge
# with base.json to be able to re-create new.json
# It takes the base.json input from stdin and the first parameter
# is the new json file to compare against, and outputs the result
# to stdout.
#
# Example usage:
#
# cat gamedb.json | ./DiffGameDB.py updatedb.json > patch.json
#
import sys, json
if not len(sys.argv) == 2:
print "Incorrect number of parameters"
exit(0)
baseData = json.load(sys.stdin)
newData = json.load(open(sys.argv[1], 'rb'))
diffData = {}
def ConvertListToDict(obj):
return dict(zip(map(str, range(len(obj))), obj))
# Idea from stackoverflow.com #5903720
def CompareRows(val1, val2):
for k in val1:
if k not in val2:
return False
for k in val2:
if k not in val1:
return False
if val1[k] != val2[k]:
if type(val2[k]) not in (dict, list):
return False
if type(val1[k]) != type(val2[k]):
return False
if type(val1[k]) == dict:
if not CompareRows(val1[k], val2[k]):
return False
if type(val1[k]) == list:
if not CompareRows(ConvertListToDict(val1[k]), ConvertListToDict(val2[k])):
return False
return True
def DiffRows(val1, val2, diff):
res = True
for k in val1:
if k not in val2:
# something removed, copy new replacement at this level
diff = val2
# we don't need to continue iterating here as we have taken everything
return False
for k in val2:
if k not in val1:
# something added, append the added part
diff[k] = val2[k]
res = False
# we keep iterating looking for more added or something removed
elif val1[k] != val2[k]:
if type(val2[k]) not in (dict, list):
# a non dict/list value changed
diff[k] = val2[k]
res = False
# keep iterating
if type(val1[k]) != type(val2[k]):
# type changed!
diff[k] = val2[k]
res = False
# keep iterating
if type(val1[k]) == dict:
# recurse
d = dict()
if not DiffRows(val1[k], val2[k], d):
diff[k] = d
res = False
if type(val1[k]) == list:
# recurse
d = dict()
if not DiffRows(ConvertListToDict(val1[k]), ConvertListToDict(val2[k]), d):
# Because it is an array, and it was changed, we need to just take
# the new array to make it robust, however the diff will be a bit bigger
diff[k] = val2[k]
res = False
return res
for tableName in newData:
if not tableName in baseData:
diffData[tableName] = newData[tableName]
print 'Adding a new table??!! ABORTING. New table: ' + tableName
exit(0)
else:
baseTable = baseData[tableName]
newTable = newData[tableName]
for row in newTable:
if not row.has_key('uid'):
print 'Unexpected database table without a UID column. ABORTING. Row: ' + str(row) + ' from table: ' + tableName
exit(0)
else:
uid = row['uid']
found = 0
origRow = {}
for baseRow in baseTable:
if baseRow['uid'] == uid:
found += 1
origRow = baseRow
if found == 0:
print 'new row added: ' + str(row)
diffData[tableName].append(row)
elif found == 1:
diff = {}
if DiffRows(origRow, row, diff) == False:
# This just copies the whole row, it is not a minimal diff
if not tableName in diffData:
diffData[tableName] = list()
diffData[tableName].append(diff)
else:
print 'ABORTING. Found two rows with same UID. uid: ' + uid + ' from table: ' + tableName
print json.dumps(diffData)