diff --git a/huffman-coding.py b/huffman-coding.py new file mode 100644 index 0000000..141525a --- /dev/null +++ b/huffman-coding.py @@ -0,0 +1,77 @@ +from collections import Counter + +# function to find path of desired character +def find_node(nodes, target_character): + # for each item in nodes + for index, item in enumerate(nodes): + # if item is desired character, return the path to it + if item == target_character: + return [str(index)] + # if the item is a list or tuple + if isinstance(item, (list, tuple)): + # run this function again to dig further into the nested items + path = find_node(item, target_character) + # if desired character is found, return its path + if path: + return [str(index)] + path + # if desired character not found, return empty list + return [] + +# retrieve text to encode +txt = input("Text: ") + +# create list of tuples in descending order of frequency: (character, frequency) +info = Counter(txt).most_common() +# change the list into ascending order +info.reverse() +# create list for character tuples +nodes = [] +# create list for node usage frequencies +frequencies = [] + +# copy nodes and their usage frequencies to the dedicated lists +for item in info: + nodes.append(item[0]) + frequencies.append(item[1]) + +# repeat until only one top-level node exists +while len(nodes) > 1: + # combine two least frequent characters' nodes into a new tuple node, containing the old nodes (old_node_1, old_node_2) + new_node = (nodes[0], nodes[1]) + # combine two least frequent characters' frequencies into a total frequency, to be used at the top level of the list of nodes + new_frequency = frequencies[0] + frequencies[1] + # remove nodes that have been nested inside the new node + del nodes[0:2] + # remove frequencies that have been summed and added to the new frequency + del frequencies[0:2] + # find index of last node with frequency below that of the new node + i = 0 + # if there are more nodes to compare to + if (len(nodes) > 0): + # if the largest frequency is smaller than the new one, place the new node at the end of the list + if (frequencies[-1] < new_frequency): + i = -1 + # else, if the first frequency is larger than the new one + elif not (frequencies[i] >= new_frequency): + # find the largest frequency that is smaller than the new one + while (frequencies[i] < new_frequency): + i += 1 + # insert the new node in its rightful position, maintaining ascending order of frequency + nodes.insert(i, new_node) + # insert the new frequency in its rightful position, maintaining ascending order of frequency + frequencies.insert(i, new_frequency) + +# print all nodes +print("Nodes: %s" % nodes) + +# encrypted text +output = "" + +# for every character in the text to be encrypted +for char in txt: + # find its path and add it to the encrypted text + output += "".join(find_node(nodes, char)) + +# print encrypted message + +print("Encrypted message: %s" % output)