This repository has been archived on 2025-11-02. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
small-projects/huffman-coding.py
2020-02-27 08:46:53 +00:00

108 lines
3.6 KiB
Python

# import module to sort characters in order of frequency
from collections import Counter
# function to find path of desired character
def find_node(nodes, target_character):
for index, item in enumerate(nodes):
if item == target_character:
# return its path
return [str(index)]
if isinstance(item, (list, tuple)):
# run this function again to dig further into the nested items
path = find_node(item, target_character)
# if desired character is found, return its path
if path:
return [str(index)] + path
# if desired character not found, return empty list
return []
def encode(txt):
# list of tuples in descending order of frequency: (character, frequency)
info = Counter(txt).most_common()
# change the list into ascending order
info.reverse()
# list for character tuples
nodes = []
# list for node usage frequencies
frequencies = []
# copy nodes and their usage frequencies to the dedicated lists
for item in info:
nodes.append(item[0])
frequencies.append(item[1])
# repeat until only one top-level node exists
while len(nodes) > 2:
# combine two least frequent characters' nodes into a new tuple node, containing the old nodes (old_node_1, old_node_2)
new_node = (nodes[0], nodes[1])
# combine two least frequent characters' frequencies into a total frequency, to be used at the top level of the list of nodes
new_frequency = frequencies[0] + frequencies[1]
print(f"New node: {new_node}, new frequency: {new_frequency}")
print(f"Nodes to be removed: {nodes[0:2]}, frequencies to be removed: {frequencies[0:2]}")
# remove nodes that have been nested inside the new node
del nodes[0:2]
# remove frequencies that have been summed and added to the new frequency
del frequencies[0:2]
# find index of the last node with a frequency below that of the new node
# if the largest frequency is smaller than the new one, place the new node at the end of the list
if (frequencies[-1] < new_frequency):
print("i = -1")
i = len(frequencies)
else:
# loop over every frequency
for index, frequency in enumerate(frequencies):
if (frequency >= new_frequency):
print(f"{frequency} >= {new_frequency}. Breaking. Index is {index}")
# record the index to insert the frequency at
i = index
break
# insert the new node and frequency in their rightful positions, maintaining ascending order of frequency
nodes.insert(i, new_node)
frequencies.insert(i, new_frequency)
print(f"Nodes: {nodes}, frequencies: {frequencies}")
print(f"Nodes: {nodes}")
# encrypted text
output = ""
# characters and encoded values
encoded_values = {}
for char in set(txt):
# find the character's path and add it to the dictionary
encoded_values[char] = "".join(find_node(nodes, char))
print(f"Encoded values: {encoded_values}")
for char in txt:
# add text to final output
output += encoded_values[char]
print(f"Encoded message: {output}")
return [output, nodes]
def decode(txt, nodes):
# string to hold decoded message
output = ""
# replicate nodes, for looping over and editing with the first part of the code
node = nodes
for digit in txt:
# get the node with the corresponding index
node = node[int(digit)]
# if the retrieved node isn't a tuple (i.e. it isn't a parent node)
if (not isinstance(node, tuple)):
output += node
# replace the retrieved node with the whole tree again, for looping over and editing with the next part of the code
node = nodes
print(f"Decoded message: {output}")
return output
encoding = encode(input("Text: "))
decode(encoding[0], encoding[1])