penntreebank_inspect.py

import sys, cPickle
import matplotlib.pyplot as plt

def split_path(pathlike):
    i, pathlike = pathlike
    try:
        name, path = pathlike.split(":")
    except (ValueError, AttributeError):
        name, path = i, pathlike
    print("%s: %s" % (name, path))
    return name, path

def load_instance(pathlike):
    name, path = split_path(pathlike)
    with open(path, "rb") as file:
        thing = cPickle.load(file)
    return dict(name=name, path=path, **thing)

# arguments: (optionally labeled) paths to pickle files generated by penntreebank_evaluate.py, in the form [label:]path
paths = sys.argv[1:]
instances = list(map(load_instance, enumerate(paths)))

import math
def natstobits(x):
    return x / math.log(2)

colors = "blue red green cyan magenta yellow black white".split()
for which_set in "train valid test".split():
    plt.figure()
    for situation, kwargs in [("inference", dict(linestyle="solid")),
                              ("training",  dict(linestyle="dashed"))]:
        for color, instance in zip(colors, instances):
            # baseline training/inference performances will be identical
            if instance["name"] == "LSTM" and situation == "training":
                continue

            label = instance["name"]
            if instance["name"] == "BN-LSTM":
                label += ", " + dict(training="batch statistics",
                                     inference="population statistics")[situation]

            results = instance["results"][situation][which_set]
            tvs = [(t, v["cross_entropy"]) for t, v in results.items()]
            time, value = zip(*tvs)

            # don't care about result of length 50 as we're training on 100 now
            assert time[0] == 50
            time = time[1:]
            value = value[1:]

            value = list(map(natstobits, value))
            plt.plot(time, value, label=label, c=color, linewidth=3, **kwargs)
            #plt.yscale("log")
    #plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.legend()
    #plt.title("performance on slices of the " + which_set + " string")
    plt.xlabel("sequence length")
    plt.ylabel("mean bits per character")

for instance in instances:
    print "bpc on full test", instance["name"], natstobits(instance["results"]["proper_test"]["cross_entropy"])

import pdb; pdb.set_trace()
plt.show()

if False:
    for instance in instances:
        for variable, value in instance["new_popstats"].items():
            plt.figure()
            plt.imshow(value, cmap="bone", aspect="auto")
            plt.colorbar()
            plt.title("%s %s" % (instance["name"], variable.name))
        import pdb; pdb.set_trace()
        plt.show()

import pdb; pdb.set_trace()