108 lines
3.6 KiB
Python
108 lines
3.6 KiB
Python
# import module to sort characters in order of frequency
|
|
from collections import Counter
|
|
|
|
# function to find path of desired character
|
|
def find_node(nodes, target_character):
|
|
for index, item in enumerate(nodes):
|
|
if item == target_character:
|
|
# return its path
|
|
return [str(index)]
|
|
if isinstance(item, (list, tuple)):
|
|
# run this function again to dig further into the nested items
|
|
path = find_node(item, target_character)
|
|
# if desired character is found, return its path
|
|
if path:
|
|
return [str(index)] + path
|
|
# if desired character not found, return empty list
|
|
return []
|
|
|
|
def encode(txt):
|
|
# list of tuples in descending order of frequency: (character, frequency)
|
|
info = Counter(txt).most_common()
|
|
# change the list into ascending order
|
|
info.reverse()
|
|
# list for character tuples
|
|
nodes = []
|
|
# list for node usage frequencies
|
|
frequencies = []
|
|
|
|
# copy nodes and their usage frequencies to the dedicated lists
|
|
for item in info:
|
|
nodes.append(item[0])
|
|
frequencies.append(item[1])
|
|
|
|
# repeat until only one top-level node exists
|
|
while len(nodes) > 2:
|
|
# combine two least frequent characters' nodes into a new tuple node, containing the old nodes (old_node_1, old_node_2)
|
|
new_node = (nodes[0], nodes[1])
|
|
# combine two least frequent characters' frequencies into a total frequency, to be used at the top level of the list of nodes
|
|
new_frequency = frequencies[0] + frequencies[1]
|
|
|
|
print(f"New node: {new_node}, new frequency: {new_frequency}")
|
|
print(f"Nodes to be removed: {nodes[0:2]}, frequencies to be removed: {frequencies[0:2]}")
|
|
|
|
# remove nodes that have been nested inside the new node
|
|
del nodes[0:2]
|
|
# remove frequencies that have been summed and added to the new frequency
|
|
del frequencies[0:2]
|
|
|
|
# find index of the last node with a frequency below that of the new node
|
|
|
|
# if the largest frequency is smaller than the new one, place the new node at the end of the list
|
|
if (frequencies[-1] < new_frequency):
|
|
print("i = -1")
|
|
i = len(frequencies)
|
|
else:
|
|
# loop over every frequency
|
|
for index, frequency in enumerate(frequencies):
|
|
if (frequency >= new_frequency):
|
|
print(f"{frequency} >= {new_frequency}. Breaking. Index is {index}")
|
|
# record the index to insert the frequency at
|
|
i = index
|
|
break
|
|
|
|
# insert the new node and frequency in their rightful positions, maintaining ascending order of frequency
|
|
nodes.insert(i, new_node)
|
|
frequencies.insert(i, new_frequency)
|
|
print(f"Nodes: {nodes}, frequencies: {frequencies}")
|
|
|
|
print(f"Nodes: {nodes}")
|
|
|
|
# encrypted text
|
|
output = ""
|
|
|
|
# characters and encoded values
|
|
encoded_values = {}
|
|
|
|
for char in set(txt):
|
|
# find the character's path and add it to the dictionary
|
|
encoded_values[char] = "".join(find_node(nodes, char))
|
|
|
|
print(f"Encoded values: {encoded_values}")
|
|
|
|
for char in txt:
|
|
# add text to final output
|
|
output += encoded_values[char]
|
|
|
|
print(f"Encoded message: {output}")
|
|
return [output, nodes]
|
|
|
|
def decode(txt, nodes):
|
|
# string to hold decoded message
|
|
output = ""
|
|
# replicate nodes, for looping over and editing with the first part of the code
|
|
node = nodes
|
|
for digit in txt:
|
|
# get the node with the corresponding index
|
|
node = node[int(digit)]
|
|
# if the retrieved node isn't a tuple (i.e. it isn't a parent node)
|
|
if (not isinstance(node, tuple)):
|
|
output += node
|
|
# replace the retrieved node with the whole tree again, for looping over and editing with the next part of the code
|
|
node = nodes
|
|
|
|
print(f"Decoded message: {output}")
|
|
return output
|
|
|
|
encoding = encode(input("Text: "))
|
|
decode(encoding[0], encoding[1]) |