#voc = [1, 1, 1, 1, 1, 1, 1, 1] ## complete tree
voc = [4, 3, 2, 1] ## single path tree
voc_size = len(voc)
count = voc + [1e5 for _ in range(voc_size + 1)]
binary = [0 for _ in range(voc_size * 2 + 1)]
parent_node = [0 for _ in range(voc_size * 2 + 1)]
print count
[4, 3, 2, 1, 100000.0, 100000.0, 100000.0, 100000.0, 100000.0]
pos1, pos2 = voc_size - 1, voc_size
for i in range(0, voc_size-1):
if pos1 >= 0:
if count[pos1] < count[pos2]:
min1i = pos1
pos1 -= 1
else:
min1i = pos2
pos2 += 1
else:
min1i = pos2
pos2 += 1
if pos1 >= 0:
if count[pos1] < count[pos2]:
min2i = pos1
pos1 -= 1
else:
min2i = pos2
pos2 += 1
else:
min2i = pos2
pos2 += 1
count[voc_size + i] = count[min1i] + count[min2i]
parent_node[min1i] = voc_size + i
parent_node[min2i] = voc_size + i
## implicitly binary[min1i] = 0
binary[min2i] = 1
print count
print binary
[4, 3, 2, 1, 3, 6, 10, 100000.0, 100000.0] [0, 1, 1, 0, 0, 1, 0, 0, 0]
So it proves that only 2 voc_size - 1 nodes will be needed to construct a tree, even the tree is complete*
point = [0 for _ in range(40)]
code = [0 for _ in range(40)]
for a in range(voc_size):
b = a
i = 0
while True:
code[i] = binary[b]
point[i] = b
i += 1
b = parent_node[b]
if (b == voc_size * 2 - 2):
break
print code
print point
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]