Skip to content

Commit

Permalink
Adding LZ->Huffman encoding result
Browse files Browse the repository at this point in the history
  • Loading branch information
jzharris committed Mar 22, 2019
1 parent b07d7ba commit 834cb26
Show file tree
Hide file tree
Showing 6 changed files with 1,213 additions and 221 deletions.
7 changes: 7 additions & 0 deletions main/Encoding/config_encode_yolo.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
"verbose": true
},

"lz_huff": {
"input_checkpoint": "lp_seg_mobilenet_quant_it2",
"encoded_name": "encoded_mobilenet_quant_it2",
"white_regex": ["CustomAdam", "training", "loss", "moving_mean", "moving_variance"],
"verbose": true
},

"convert": {
"convert_dir": "../LP_segmentation/quant_models/"
}
Expand Down
64 changes: 64 additions & 0 deletions main/Encoding/encode_lz_huffman.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import argparse
import os
import json
import tensorflow as tf

from encode_network import encode_lz, encode_huff

##########################################################################################################
# run: python encode_lz_huffman.py -c config_encode_yolo.json 2>&1 | tee logs_lz_huffman.txt
##########################################################################################################

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="-1"

argparser = argparse.ArgumentParser(
description='Train and validate YOLO_v2 model on any dataset')

argparser.add_argument(
'-c',
'--conf',
help='path to configuration file')


def _main_(args):
config_path = args.conf

with open(config_path) as config_buffer:
config = json.loads(config_buffer.read())

# skip specific types of variables/layers
white_regex = config['lz_huff']['white_regex']

# checkpoint paths
input_checkpoint = config['lz_huff']['input_checkpoint']
encoded_name = config['lz_huff']['encoded_name']
verbose = config['lz_huff']['verbose']

# output paths
parent_folder = config['convert']['convert_dir']
output_folder = "converted_checkpoint"
output_path = os.path.join(parent_folder, output_folder)

if not os.path.exists(output_path):
raise Exception("ERROR: converted checkpoint not found at {}".format(output_path))

###############################
# Load the model and encode
###############################

with tf.Session() as sess:
new_saver = tf.train.import_meta_graph(os.path.join(output_path, '{}.ckpt.meta'.format(input_checkpoint)))
new_saver.restore(sess, tf.train.latest_checkpoint(output_path))

# first stage of encoding: LZ
codes = encode_lz(sess, white_regex, verbose)
# print(codes)

# second stage of encoding: Huffman
encode_huff(sess, codes=codes)


if __name__ == '__main__':
args = argparser.parse_args()
_main_(args)
130 changes: 85 additions & 45 deletions main/Encoding/encode_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def get_freqs(self):

# return unique, freqs
freqs_d = dict(zip(unique, freqs))
pprint(freqs_d)
# pprint(freqs_d)
return freqs_d

def make_heap(self, frequency):
Expand Down Expand Up @@ -80,7 +80,7 @@ def make_codes(self):
self.make_codes_helper(root, current_code)
# pprint(self.codes)

def make_stats(self):
def make_stats(self, dtype='float'):
codes = self.codes

# count how many floats needed for codebook
Expand All @@ -91,65 +91,96 @@ def make_stats(self):
for val in self.val_np.flatten():
encoded_size += len(codes[val])

print('>>> {} 32-bit floating point numbers needed for codebook'.format(codebook_size))
print('>>> {} {} numbers needed for codebook'.format(codebook_size,
'32-bit floating point' if dtype=='float' else 'uint8'))
print('>>> {} bits needed for encoded variables'.format(encoded_size))

return codebook_size, encoded_size


def encode_huff(sess, white_regex=None, verbose=True):
def encode_huff(sess, codes=None, white_regex=None, verbose=True):
print('Encoding network...')

codebook_sizes = []
encoded_bits = []

all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
weight_count = 0
layer_count = 0
for v in (tqdm(all_vars) if not verbose else all_vars):
skip = False
for regex in white_regex:
if regex in v.name:
skip = True
if skip:
if verbose:
print('>>> skipping {}, part of whitelist'.format(v.name))
sys.stdout.flush()
else:
if verbose:
print('>>> encoding {}'.format(v.name))
sys.stdout.flush()

# perform the encoding for the layer
val_np = sess.run(v)
encoder = HuffmanCoding(val_np)
if white_regex is None:
white_regex = []

if codes is None:
codebook_sizes = []
encoded_bits = []
weight_count = 0
layer_count = 0

all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

for v in (tqdm(all_vars) if not verbose else all_vars):
skip = False
for regex in white_regex:
if regex in v.name:
skip = True
if skip:
if verbose:
print('>>> skipping {}, part of whitelist'.format(v.name))
sys.stdout.flush()
else:
if verbose:
print('>>> encoding {}'.format(v.name))
sys.stdout.flush()

# perform the encoding for the layer
val_np = sess.run(v)
encoder = HuffmanCoding(val_np)
codebook_size, encoded_size = encoder.encode_np()
codebook_sizes.append(codebook_size)
encoded_bits.append(encoded_size) # number of bits needed to represent this layer

# increment weight count by number of weights in layer
layer_weights = len(val_np.flatten())
weight_count += layer_weights
layer_count += 1

print(">>>")
print(">>> encoded a total of {} layers, and {} weights".format(layer_count, weight_count))
print(">>> a total of {} codebooks containing {} 32-bit floating point numbers ({:.2f} KB) was created".
format(len(codebook_sizes), sum(codebook_sizes), sum(codebook_sizes) * 32 / 8000))
print(">>> a total of {:.2f} KB are required to store the encoded variables ({:.2f} KB per layer on average)".
format(sum(encoded_bits) / 8000, np.average(np.array(encoded_bits)) / 8000))
original_kb = weight_count * 32 / 8000
print(">>> original number of bits needed: {:.2f} KB".format(original_kb))
new_kb = (sum(codebook_sizes) * 32 + sum(encoded_bits)) / 8000
print(">>> new number of bits needed: {:.2f} KB".format(new_kb))
print(">>> compression ratio: {:.4f}".format(original_kb / new_kb))

else: # assuming first compression is from LZ
codebook_sizes = []
encoded_bits = []
codes_count = 0

for v_name in (tqdm(codes.keys()) if not verbose else codes.keys()):
codes_count += len(codes[v_name])
encoder = HuffmanCoding(np.array(codes[v_name]))
codebook_size, encoded_size = encoder.encode_np()
codebook_sizes.append(codebook_size)
encoded_bits.append(encoded_size) # number of bits needed to represent this layer

# increment weight count by number of weights in layer
layer_weights = len(val_np.flatten())
weight_count += layer_weights
layer_count += 1

print(">>>")
print(">>> encoded a total of {} layers, and {} weights".format(layer_count, weight_count))
print(">>> a total of {} codebooks containing {} 32-bit floating point numbers ({:.2f} KB) was created".
format(len(codebook_sizes), sum(codebook_sizes), sum(codebook_sizes) * 32 / 8000))
print(">>> a total of {:.2f} KB are required to store the encoded variables ({:.2f} KB per layer on average)".
format(sum(encoded_bits)/8000, np.average(np.array(encoded_bits))/8000))
original_kb = weight_count * 32 / 8000
print(">>> original number of bits needed: {:.2f} KB".format(original_kb))
new_kb = (sum(codebook_sizes) * 32 + sum(encoded_bits)) / 8000
print(">>> new number of bits needed: {:.2f} KB".format(new_kb))
print(">>> compression ratio: {:.4f}".format(original_kb / new_kb))
print(">>>")
print(">>> encoded a total of {} LZ codebooks, and {} LZ codes".format(len(codes.keys()), codes_count))
print(">>> a total of {} codebooks containing {} uint32 numbers ({:.2f} KB) was created".
format(len(codebook_sizes), sum(codebook_sizes), sum(codebook_sizes) * 32 / 8000))
print(">>> a total of {:.2f} KB are required to store the encoded variables ({:.2f} KB per layer on average)".
format(sum(encoded_bits) / 8000, np.average(np.array(encoded_bits)) / 8000))
original_kb = codes_count * 32 / 8000
print(">>> original number of bits needed to store LZ codes: {:.2f} KB".format(original_kb))
new_kb = (sum(codebook_sizes) * 32 + sum(encoded_bits)) / 8000
print(">>> new number of bits needed: {:.2f} KB".format(new_kb))
print(">>> compression ratio: {:.4f}".format(original_kb / new_kb))

########################################################################################################################

class LempelZivCoding:
def __init__(self, val_np):
self.val_np = val_np
self.combinations = {}
self.codes = None

def encode_np(self):
# gather statistics for each value in the np array
Expand Down Expand Up @@ -183,11 +214,16 @@ def make_codes(self):
code += 1
p = c
c = ''

if code > 2**32: # assuming 32-bit numbers
raise Exception('code is: {}'.format(code))

output_code.append(self.combinations[p])
print(output_code)
self.codes = output_code

codebook_size = code_length # need to store the original unique elements
encoded_size = len(output_code) * 8 # need to store 8-bit uints for all the codes
encoded_size = len(output_code) * 32 # need to store uint32 variables for all the codes

print('>>> {} 32-bit floating point numbers needed for codebook'.format(codebook_size))
print('>>> {} bits needed for encoded variables'.format(encoded_size))
Expand All @@ -197,6 +233,7 @@ def make_codes(self):
def encode_lz(sess, white_regex=None, verbose=True):
print('Encoding network...')

codes = {}
codebook_sizes = []
encoded_bits = []

Expand All @@ -223,6 +260,7 @@ def encode_lz(sess, white_regex=None, verbose=True):
codebook_size, encoded_size = encoder.encode_np()
codebook_sizes.append(codebook_size)
encoded_bits.append(encoded_size) # number of bits needed to represent this layer
codes[v.name] = encoder.codes

# increment weight count by number of weights in layer
layer_weights = len(val_np.flatten())
Expand All @@ -239,4 +277,6 @@ def encode_lz(sess, white_regex=None, verbose=True):
print(">>> original number of bits needed: {:.2f} KB".format(original_kb))
new_kb = (sum(codebook_sizes) * 32 + sum(encoded_bits)) / 8000
print(">>> new number of bits needed: {:.2f} KB".format(new_kb))
print(">>> compression ratio: {:.4f}".format(original_kb / new_kb))
print(">>> compression ratio: {:.4f}".format(original_kb / new_kb))

return codes # can be used to chain encoders
Loading

0 comments on commit 834cb26

Please sign in to comment.