summaryrefslogtreecommitdiffstats
path: root/xmldiff.py
blob: fdeb8b16aa670edc5eb08877b1a00e8ff7d064aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/python

import xml.etree.ElementTree
from sys import argv, exit

if len(argv) < 3:
    print 'Usage: xmldiff.py <file1> <file2>'
    exit(1)

# Helper functions to reduce duplication
def ensureKey(dictionary, key, init):
    if key not in dictionary:
        dictionary[key] = init()

def add(dictionary, key, value, index):
    ensureKey(dictionary, key, lambda: [None, None])
    dictionary[key][index] = value

ours = xml.etree.ElementTree.parse(argv[1]).getroot().find('events')
theirs = xml.etree.ElementTree.parse(argv[2]).getroot().find('events')

# Collect every event on either side
byHandle = {}
for e in ours:
    add(byHandle, e.find('handle').text, e, 0)

for e in theirs:
    add(byHandle, e.find('handle').text, e, 1)

# 0: only ours, 1: only theirs, 2: identical
results = [[], [], []]
differences = {}
for key, value in byHandle.items():
    # event exists only on one side
    if value[0] is None:
        results[0].append(key)
        continue
    elif value[1] is None:
        results[1].append(key)
        continue
    
    # event exists on both sides, compare each subelement
    subelements = {}
    for se in value[0]:
        add(subelements, se.tag, xml.etree.ElementTree.tostring(se), 0)

    for se in value[1]:
        add(subelements, se.tag, xml.etree.ElementTree.tostring(se), 1)

    match = True
    for sekey, sevalue in subelements.items():
        ensureKey(differences, key, list)
        if sevalue[0] is None:
            differences[key].append(sekey + ' only in ' + argv[1])
            match = False
        elif sevalue[1] is None:
            differences[key].append(sekey + ' only in ' + argv[2])
            match = False
        elif sevalue[0] != sevalue[1]:
            differences[key].append(sekey + ' differs')
            match = False

    if match:
        results[2].append(key)

# Print results in groups
print 'Elements only in ' + argv[1] + ' (' + str(len(results[0])) + '):'
for e in results[0]:
    print '  ' + e

print
print 'Elements only in ' + argv[2] + ' (' + str(len(results[1])) + '):'
for e in results[1]:
    print '  ' + e

print
print 'Elements identical in both (' + str(len(results[2])) + '):'
for e in results[2]:
    print '  ' + e

print
print 'Elements with differences (' + str(len([l for k, l in differences.items() if len(differences[k]) > 0])) + '):'
for e, d in differences.items():
    if len(d) < 1:
        continue

    print '  ' + e
    for s in d:
        print('    ' + s)
    print