diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 31e3812a7..e8d81727e 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -29,7 +29,7 @@ RUN apt-get -qq update && apt-get -qq -y install curl bzip2 \ && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log # Install LogicNets system prerequisites -RUN apt-get -qq update && apt-get -qq -y install verilator build-essential libx11-6 git \ +RUN apt-get -qq update && apt-get -qq -y install verilator build-essential libx11-6 git libreadline-dev \ && apt-get autoclean \ && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log @@ -41,6 +41,16 @@ ENV OHMYXILINX=/workspace/oh-my-xilinx RUN git clone https://github.com/dirjud/Nitro-Parts-lib-Xilinx.git ENV NITROPARTSLIB=/workspace/Nitro-Parts-lib-Xilinx +# Adding LogicNets dependency on ABC +COPY examples/mnist/abc.patch /workspace/ +RUN git clone https://github.com/berkeley-abc/abc.git \ + && cd abc \ + && git checkout 813a0f1ff1ae7512cb7947f54cd3f2ab252848c8 \ + && git apply /workspace/abc.patch \ + && rm -f /workspace/abc.patch \ + && make -j`nproc` +ENV ABC_ROOT=/workspace/abc + # Create the user account to run LogicNets RUN groupadd -g $GID $GNAME RUN useradd -m -u $UID $UNAME -g $GNAME diff --git a/examples/cybersecurity/dump_luts.py b/examples/cybersecurity/dump_luts.py new file mode 100644 index 000000000..01a57663f --- /dev/null +++ b/examples/cybersecurity/dump_luts.py @@ -0,0 +1,119 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + save_luts, \ + module_list_to_verilog_module + +from train import configs, model_config, dataset_config, other_options, test +from dataset import get_preqnt_dataset +from models import UnswNb15NeqModel, UnswNb15LutModel + +if __name__ == "__main__": + parser = ArgumentParser(description="Generate histograms of states used throughout LogicNets") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/unsw_nb15_binarized.npz', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the calculated histograms (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset['train'] = get_preqnt_dataset(dataset_cfg['dataset_file'], split='train') + train_loader = DataLoader(dataset["train"], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset['train'][0] + dataset_length = len(dataset['train']) + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = 1 + model = UnswNb15NeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference of baseline model on training set (%d examples)..." % (dataset_length)) + model.eval() + baseline_accuracy = test(model, train_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + + # Instantiate LUT-based model + lut_model = UnswNb15LutModel(model_cfg) + lut_model.load_state_dict(checkpoint['model_dict']) + + # Generate the truth tables in the LUT module + print("Converting to NEQs to LUTs...") + generate_truth_tables(lut_model, verbose=True) + + # Test the LUT-based model + print("Running inference of LUT-based model training set (%d examples)..." % (dataset_length)) + lut_inference(lut_model, track_used_luts=True) + lut_model.eval() + lut_accuracy = test(lut_model, train_loader, cuda=False) + print("LUT-Based Model accuracy: %f" % (lut_accuracy)) + print("Saving LUTs to %s... " % (options_cfg["log_dir"] + "/luts.pth")) + save_luts(lut_model, options_cfg["log_dir"] + "/luts.pth") + print("Done!") + diff --git a/examples/cybersecurity/models.py b/examples/cybersecurity/models.py index b98ab5dc9..bfbaf2ca5 100644 --- a/examples/cybersecurity/models.py +++ b/examples/cybersecurity/models.py @@ -63,13 +63,15 @@ def __init__(self, model_config): self.verilog_dir = None self.top_module_filename = None self.dut = None + self.verify = True self.logfile = None - def verilog_inference(self, verilog_dir, top_module_filename, logfile: bool = False, add_registers: bool = False): + def verilog_inference(self, verilog_dir, top_module_filename, logfile: bool = False, add_registers: bool = False, verify: bool = True): self.verilog_dir = realpath(verilog_dir) self.top_module_filename = top_module_filename - self.dut = PyVerilator.build(f"{self.verilog_dir}/{self.top_module_filename}", verilog_path=[self.verilog_dir], build_dir=f"{self.verilog_dir}/verilator") + self.dut = PyVerilator.build(f"{self.verilog_dir}/{self.top_module_filename}", verilog_path=[self.verilog_dir], build_dir=f"{self.verilog_dir}/verilator", command_args=("--x-assign","0",)) self.is_verilog_inference = True + self.verify = verify self.logfile = logfile if add_registers: self.latency = len(self.num_neurons) @@ -95,11 +97,8 @@ def verilog_forward(self, x): self.dut.io.clk = 0 for i in range(x.shape[0]): x_i = x[i,:] - y_i = self.pytorch_forward(x[i:i+1,:])[0] xv_i = list(map(lambda z: input_quant.get_bin_str(z), x_i)) - ys_i = list(map(lambda z: output_quant.get_bin_str(z), y_i)) xvc_i = reduce(lambda a,b: a+b, xv_i[::-1]) - ysc_i = reduce(lambda a,b: a+b, ys_i[::-1]) self.dut["M0"] = int(xvc_i, 2) for j in range(self.latency + 1): #print(self.dut.io.M5) @@ -107,9 +106,13 @@ def verilog_forward(self, x): result = f"{res:0{int(total_output_bits)}b}" self.dut.io.clk = 1 self.dut.io.clk = 0 - expected = f"{int(ysc_i,2):0{int(total_output_bits)}b}" result = f"{res:0{int(total_output_bits)}b}" - assert(expected == result) + if self.verify: + y_i = self.pytorch_forward(x[i:i+1,:])[0] + ys_i = list(map(lambda z: output_quant.get_bin_str(z), y_i)) + ysc_i = reduce(lambda a,b: a+b, ys_i[::-1]) + expected = f"{int(ysc_i,2):0{int(total_output_bits)}b}" + assert(expected == result) res_split = [result[i:i+output_bitwidth] for i in range(0, len(result), output_bitwidth)][::-1] yv_i = torch.Tensor(list(map(lambda z: int(z, 2), res_split))) y[i,:] = yv_i diff --git a/examples/cybersecurity/neq2lut.py b/examples/cybersecurity/neq2lut.py index ff4a6b6de..8dd8939fb 100644 --- a/examples/cybersecurity/neq2lut.py +++ b/examples/cybersecurity/neq2lut.py @@ -20,7 +20,8 @@ from logicnets.nn import generate_truth_tables, \ lut_inference, \ - module_list_to_verilog_module + module_list_to_verilog_module, \ + load_histograms from logicnets.synthesis import synthesize_and_get_resource_counts from logicnets.util import proc_postsynth_file @@ -34,6 +35,8 @@ "checkpoint": None, "generate_bench": False, "add_registers": False, + "histograms": None, + "freq_thresh": None, "simulate_pre_synthesis_verilog": False, "simulate_post_synthesis_verilog": False, } @@ -68,6 +71,10 @@ help="A location to store the log output of the training run and the output model (default: %(default)s)") parser.add_argument('--checkpoint', type=str, required=True, help="The checkpoint file which contains the model weights") + parser.add_argument('--histograms', type=str, default=None, + help="The checkpoint histograms of LUT usage (default: %(default)s)") + parser.add_argument('--freq-thresh', type=int, default=None, + help="Threshold to use to include this truth table into the model (default: %(default)s)") parser.add_argument('--generate-bench', action='store_true', default=False, help="Generate the truth table in BENCH format as well as verilog (default: %(default)s)") parser.add_argument('--dump-io', action='store_true', default=False, @@ -143,9 +150,12 @@ 'test_accuracy': lut_accuracy} torch.save(modelSave, options_cfg["log_dir"] + "/lut_based_model.pth") + if options_cfg["histograms"] is not None: + luts = torch.load(options_cfg["histograms"]) + load_histograms(lut_model, luts) print("Generating verilog in %s..." % (options_cfg["log_dir"])) - module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=options_cfg["generate_bench"], add_registers=options_cfg["add_registers"]) + module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=options_cfg["generate_bench"], add_registers=options_cfg["add_registers"], freq_thresh=options_cfg["freq_thresh"]) print("Top level entity stored at: %s/logicnet.v ..." % (options_cfg["log_dir"])) if args.dump_io: @@ -156,9 +166,10 @@ else: io_filename = None + if args.simulate_pre_synthesis_verilog: print("Running inference simulation of Verilog-based model...") - lut_model.verilog_inference(options_cfg["log_dir"], "logicnet.v", logfile=io_filename, add_registers=options_cfg["add_registers"]) + lut_model.verilog_inference(options_cfg["log_dir"], "logicnet.v", logfile=io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) verilog_accuracy = test(lut_model, test_loader, cuda=False) print("Verilog-Based Model accuracy: %f" % (verilog_accuracy)) @@ -168,7 +179,7 @@ if args.simulate_post_synthesis_verilog: print("Running post-synthesis inference simulation of Verilog-based model...") proc_postsynth_file(options_cfg["log_dir"]) - lut_model.verilog_inference(options_cfg["log_dir"]+"/post_synth", "logicnet_post_synth.v", io_filename, add_registers=options_cfg["add_registers"]) + lut_model.verilog_inference(options_cfg["log_dir"]+"/post_synth", "logicnet_post_synth.v", io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) post_synth_accuracy = test(lut_model, test_loader, cuda=False) print("Post-synthesis Verilog-Based Model accuracy: %f" % (post_synth_accuracy)) diff --git a/examples/cybersecurity/train.py b/examples/cybersecurity/train.py index f12ddc2dc..bb57b83e2 100644 --- a/examples/cybersecurity/train.py +++ b/examples/cybersecurity/train.py @@ -44,6 +44,8 @@ "learning_rate": 1e-1, "seed": 109, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-s-comp": { "hidden_layers": [49, 7], @@ -59,6 +61,8 @@ "learning_rate": 1e-1, "seed": 81, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-m": { "hidden_layers": [593, 256, 128, 128], @@ -74,6 +78,8 @@ "learning_rate": 1e-1, "seed": 196, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-m-comp": { "hidden_layers": [593, 256, 49, 7], @@ -89,6 +95,8 @@ "learning_rate": 1e-1, "seed": 40, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-l": { "hidden_layers": [593, 100, 100, 100], @@ -104,6 +112,8 @@ "learning_rate": 1e-1, "seed": 2, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-l-comp": { "hidden_layers": [593, 100, 25, 5], @@ -119,6 +129,8 @@ "learning_rate": 1e-1, "seed": 83, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, } diff --git a/examples/jet_substructure/README.md b/examples/jet_substructure/README.md index 42d216409..15f2f1550 100644 --- a/examples/jet_substructure/README.md +++ b/examples/jet_substructure/README.md @@ -72,3 +72,32 @@ our paper below: } ``` +## Testing BLIF Files on the JSC Dataset + +In this section, we show how to take technology-mapped BLIF files, +generate technology-mapped verilog and simulate the verilog on the JSC dataset. + +### Convert BLIF Files into Verilog + +To convert the full BLIF files (as generated from the LogicNets examples, via `neq2lut_abc.py`) into verilog, run the following: + +```bash +python blif2verilog.py --arch --input-blif /layers_full_opt.blif --output-directory +``` + +To convert the layer-wise BLIF files into verilog, run the following: + +```bash +python blif2verilog.py --arch --input-blifs /*.blif --output-directory --generated-module-name-prefix layer0 +``` + +Note, the generated module name prefix will likely have to change if the source files are handled in a different way. + +### Simulate Verilog + +The resultant verilog can be simulated as follows: + +```bash +python simulate_verilog.py --arch --checkpoint --input-verilog /logicnet.v +``` + diff --git a/examples/jet_substructure/blif2verilog.py b/examples/jet_substructure/blif2verilog.py new file mode 100644 index 000000000..929de9610 --- /dev/null +++ b/examples/jet_substructure/blif2verilog.py @@ -0,0 +1,141 @@ +# Copyright (C) 2022 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A script to convert technology-mapped BLIF files to technology mapped verilog. + +import os +import glob +import shutil +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from logicnets.abc import tech_map_to_verilog,\ + putontop_blif,\ + pipeline_tech_mapped_circuit +from logicnets.verilog import fix_abc_module_name,\ + generate_abc_verilog_wrapper + +from train import configs, model_config, dataset_config, test +from models import JetSubstructureNeqModel +from dataset import JetSubstructureDataset + +other_options = { + "output_directory": None, + "input_blifs": None, + "num_registers": None, + "generated_module_name_prefix": None, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Convert a technology-mapped BLIF files into a technology-mapped verilog file, using ABC") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/processed-pythia82-lhc13-all-pt1-50k-r1_h022_e0175_t220_nonu_truth.z', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--dataset-config', type=str, default='config/yaml_IP_OP_config.yml', + help="The file to use to configure the input dataset (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--input-blifs', nargs='+', type=str, required=True, + help="The input BLIF files") + parser.add_argument('--output-directory', type=str, default='./log', + help="The directory which the generated verilog gets stored. (default: %(default)s)") + parser.add_argument('--num-registers', type=int, default=0, + help="The number of registers to add to the generated verilog (default: %(default)s)") + parser.add_argument('--generated-module-name-prefix', type=str, default='\\aig', + help="A prefix which matches the module name in the generated verilog, but no other line of code (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(args.output_directory): + os.makedirs(args.output_directory) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset[args.dataset_split] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split=args.dataset_split) + test_loader = DataLoader(dataset[args.dataset_split], batch_size=1, shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = len(y) + model = JetSubstructureNeqModel(model_cfg) + + abc_project_root = options_cfg['output_directory'] + veropt_dir = options_cfg['output_directory'] + input_blif = "layers_full_opt.blif" + verbose = False + + if len(options_cfg['input_blifs']) > 1: + nodes, out, err = putontop_blif([os.path.realpath(blif) for blif in options_cfg['input_blifs']], input_blif, working_dir=abc_project_root, verbose=verbose) + else: + shutil.copy(os.path.realpath(options_cfg['input_blifs'][0]), f"{abc_project_root}/{input_blif}") + + if options_cfg['num_registers'] == 0: + nodes, out, err = tech_map_to_verilog(circuit_file=input_blif, output_verilog=f"layers_full_opt.v", working_dir=abc_project_root, verbose=verbose) + else: + nodes, out, err = pipeline_tech_mapped_circuit(circuit_file=input_blif, output_verilog=f"layers_full_opt.v", num_registers=options_cfg['num_registers'], working_dir=abc_project_root, verbose=verbose) + + # Fix the resultant verilog file so that it can be simulated + fix_abc_module_name(f"{veropt_dir}/layers_full_opt.v", f"{veropt_dir}/layers_full_opt.v", options_cfg["generated_module_name_prefix"], "layers_full_opt", add_timescale=options_cfg['num_registers'] != 0) + + # Generate top-level entity wrapper + module_list = model.module_list + _, input_bitwidth = module_list[0].input_quant.get_scale_factor_bits() + _, output_bitwidth = module_list[-1].output_quant.get_scale_factor_bits() + input_bitwidth, output_bitwidth = int(input_bitwidth), int(output_bitwidth) + total_input_bits = module_list[0].in_features*input_bitwidth + total_output_bits = module_list[-1].out_features*output_bitwidth + module_name="logicnet" + veropt_wrapper_str = generate_abc_verilog_wrapper(module_name=module_name, input_name="M0", input_bits=total_input_bits, output_name=f"M{len(module_list)}", output_bits=total_output_bits, submodule_name="layers_full_opt", num_registers=options_cfg['num_registers']) + with open(f"{veropt_dir}/{module_name}.v", "w") as f: + f.write(veropt_wrapper_str) + + print(f"Adding Nitro-Parts-Lib to {veropt_dir}") + source_files = glob.glob(f"{os.environ['NITROPARTSLIB']}/*.v") + for f in source_files: + shutil.copy(f, f"{veropt_dir}") + diff --git a/examples/jet_substructure/config/yaml_IP_OP_config.yml b/examples/jet_substructure/config/yaml_IP_OP_config.yml index e238039bf..95befe1fe 100644 --- a/examples/jet_substructure/config/yaml_IP_OP_config.yml +++ b/examples/jet_substructure/config/yaml_IP_OP_config.yml @@ -45,5 +45,5 @@ L1Reg: 0.0001 NormalizeInputs: 1 InputType: Dense ApplyPca: false -PcaDimensions: 10 +PcaDimensions: 12 diff --git a/examples/jet_substructure/dump_luts.py b/examples/jet_substructure/dump_luts.py new file mode 100644 index 000000000..c5acfb8b2 --- /dev/null +++ b/examples/jet_substructure/dump_luts.py @@ -0,0 +1,124 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + save_luts, \ + module_list_to_verilog_module + +from train import configs, model_config, dataset_config, other_options, test +from dataset import JetSubstructureDataset +from models import JetSubstructureNeqModel, JetSubstructureLutModel +from logicnets.synthesis import synthesize_and_get_resource_counts + +if __name__ == "__main__": + parser = ArgumentParser(description="Generate histograms of states used throughout LogicNets") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/processed-pythia82-lhc13-all-pt1-50k-r1_h022_e0175_t220_nonu_truth.z', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--dataset-config', type=str, default='config/yaml_IP_OP_config.yml', + help="The file to use to configure the input dataset (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the calculated histograms (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset['train'] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split="train") + train_loader = DataLoader(dataset["train"], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset['train'][0] + dataset_length = len(dataset['train']) + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = len(y) + model = JetSubstructureNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference of baseline model on training set (%d examples)..." % (dataset_length)) + model.eval() + baseline_accuracy, baseline_avg_roc_auc = test(model, train_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + print("Baseline AVG ROC AUC: %f" % (baseline_avg_roc_auc)) + + # Instantiate LUT-based model + lut_model = JetSubstructureLutModel(model_cfg) + lut_model.load_state_dict(checkpoint['model_dict']) + + # Generate the truth tables in the LUT module + print("Converting to NEQs to LUTs...") + generate_truth_tables(lut_model, verbose=True) + + # Test the LUT-based model + print("Running inference of LUT-based model training set (%d examples)..." % (dataset_length)) + lut_inference(lut_model, track_used_luts=True) + lut_model.eval() + lut_accuracy, lut_avg_roc_auc = test(lut_model, train_loader, cuda=False) + print("LUT-Based Model accuracy: %f" % (lut_accuracy)) + print("LUT-Based AVG ROC AUC: %f" % (lut_avg_roc_auc)) + print("Saving LUTs to %s... " % (options_cfg["log_dir"] + "/luts.pth")) + save_luts(lut_model, options_cfg["log_dir"] + "/luts.pth") + print("Done!") + diff --git a/examples/jet_substructure/models.py b/examples/jet_substructure/models.py index 4d35a86f8..806f485d6 100644 --- a/examples/jet_substructure/models.py +++ b/examples/jet_substructure/models.py @@ -65,13 +65,15 @@ def __init__(self, model_config): self.verilog_dir = None self.top_module_filename = None self.dut = None + self.verify = True self.logfile = None - def verilog_inference(self, verilog_dir, top_module_filename, logfile: bool = False, add_registers: bool = False): + def verilog_inference(self, verilog_dir, top_module_filename, logfile: bool = False, add_registers: bool = False, verify: bool = True): self.verilog_dir = realpath(verilog_dir) self.top_module_filename = top_module_filename - self.dut = PyVerilator.build(f"{self.verilog_dir}/{self.top_module_filename}", verilog_path=[self.verilog_dir], build_dir=f"{self.verilog_dir}/verilator") + self.dut = PyVerilator.build(f"{self.verilog_dir}/{self.top_module_filename}", verilog_path=[self.verilog_dir], build_dir=f"{self.verilog_dir}/verilator", command_args=("--x-assign","0",)) self.is_verilog_inference = True + self.verify = verify self.logfile = logfile if add_registers: self.latency = len(self.num_neurons) @@ -97,11 +99,8 @@ def verilog_forward(self, x): self.dut.io.clk = 0 for i in range(x.shape[0]): x_i = x[i,:] - y_i = self.pytorch_forward(x[i:i+1,:])[0] xv_i = list(map(lambda z: input_quant.get_bin_str(z), x_i)) - ys_i = list(map(lambda z: output_quant.get_bin_str(z), y_i)) xvc_i = reduce(lambda a,b: a+b, xv_i[::-1]) - ysc_i = reduce(lambda a,b: a+b, ys_i[::-1]) self.dut["M0"] = int(xvc_i, 2) for j in range(self.latency + 1): #print(self.dut.io.M5) @@ -109,9 +108,13 @@ def verilog_forward(self, x): result = f"{res:0{int(total_output_bits)}b}" self.dut.io.clk = 1 self.dut.io.clk = 0 - expected = f"{int(ysc_i,2):0{int(total_output_bits)}b}" result = f"{res:0{int(total_output_bits)}b}" - assert(expected == result) + if self.verify: + y_i = self.pytorch_forward(x[i:i+1,:])[0] + ys_i = list(map(lambda z: output_quant.get_bin_str(z), y_i)) + ysc_i = reduce(lambda a,b: a+b, ys_i[::-1]) + expected = f"{int(ysc_i,2):0{int(total_output_bits)}b}" + assert(expected == result) res_split = [result[i:i+output_bitwidth] for i in range(0, len(result), output_bitwidth)][::-1] yv_i = torch.Tensor(list(map(lambda z: int(z, 2), res_split))) y[i,:] = yv_i diff --git a/examples/jet_substructure/neq2lut.py b/examples/jet_substructure/neq2lut.py index 87666852d..f2e363c06 100644 --- a/examples/jet_substructure/neq2lut.py +++ b/examples/jet_substructure/neq2lut.py @@ -20,7 +20,8 @@ from logicnets.nn import generate_truth_tables, \ lut_inference, \ - module_list_to_verilog_module + module_list_to_verilog_module, \ + load_histograms from train import configs, model_config, dataset_config, test from dataset import JetSubstructureDataset @@ -34,6 +35,8 @@ "checkpoint": None, "generate_bench": False, "add_registers": False, + "histograms": None, + "freq_thresh": None, "simulate_pre_synthesis_verilog": False, "simulate_post_synthesis_verilog": False, } @@ -70,6 +73,10 @@ help="A location to store the log output of the training run and the output model (default: %(default)s)") parser.add_argument('--checkpoint', type=str, required=True, help="The checkpoint file which contains the model weights") + parser.add_argument('--histograms', type=str, default=None, + help="The checkpoint histograms of LUT usage (default: %(default)s)") + parser.add_argument('--freq-thresh', type=int, default=None, + help="Threshold to use to include this truth table into the model (default: %(default)s)") parser.add_argument('--generate-bench', action='store_true', default=False, help="Generate the truth table in BENCH format as well as verilog (default: %(default)s)") parser.add_argument('--dump-io', action='store_true', default=False, @@ -148,9 +155,12 @@ 'test_avg_roc_auc': lut_avg_roc_auc} torch.save(modelSave, options_cfg["log_dir"] + "/lut_based_model.pth") + if options_cfg["histograms"] is not None: + luts = torch.load(options_cfg["histograms"]) + load_histograms(lut_model, luts) print("Generating verilog in %s..." % (options_cfg["log_dir"])) - module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=options_cfg["generate_bench"], add_registers=options_cfg["add_registers"]) + module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=options_cfg["generate_bench"], add_registers=options_cfg["add_registers"], freq_thresh=options_cfg["freq_thresh"]) print("Top level entity stored at: %s/logicnet.v ..." % (options_cfg["log_dir"])) if args.dump_io: @@ -163,7 +173,7 @@ if args.simulate_pre_synthesis_verilog: print("Running inference simulation of Verilog-based model...") - lut_model.verilog_inference(options_cfg["log_dir"], "logicnet.v", logfile=io_filename, add_registers=options_cfg["add_registers"]) + lut_model.verilog_inference(options_cfg["log_dir"], "logicnet.v", logfile=io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) verilog_accuracy, verilog_avg_roc_auc = test(lut_model, test_loader, cuda=False) print("Verilog-Based Model accuracy: %f" % (verilog_accuracy)) print("Verilog-Based AVG ROC AUC: %f" % (verilog_avg_roc_auc)) @@ -174,7 +184,7 @@ if args.simulate_post_synthesis_verilog: print("Running post-synthesis inference simulation of Verilog-based model...") proc_postsynth_file(options_cfg["log_dir"]) - lut_model.verilog_inference(options_cfg["log_dir"]+"/post_synth", "logicnet_post_synth.v", io_filename, add_registers=options_cfg["add_registers"]) + lut_model.verilog_inference(options_cfg["log_dir"]+"/post_synth", "logicnet_post_synth.v", io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) post_synth_accuracy, post_synth_avg_roc_auc = test(lut_model, test_loader, cuda=False) print("Post-synthesis Verilog-Based Model accuracy: %f" % (post_synth_accuracy)) print("Post-synthesis Verilog-Based AVG ROC AUC: %f" % (post_synth_avg_roc_auc)) diff --git a/examples/jet_substructure/neq2lut_abc.py b/examples/jet_substructure/neq2lut_abc.py new file mode 100644 index 000000000..1d2bd7495 --- /dev/null +++ b/examples/jet_substructure/neq2lut_abc.py @@ -0,0 +1,171 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + module_list_to_verilog_module, \ + load_histograms +from logicnets.synthesis import synthesize_and_get_resource_counts_with_abc + +from train import configs, model_config, dataset_config, test +from dataset import JetSubstructureDataset +from models import JetSubstructureNeqModel, JetSubstructureLutModel +from dataset_dump import dump_io + +other_options = { + "cuda": None, + "log_dir": None, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Synthesize convert a PyTorch trained model into verilog using ABC") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/processed-pythia82-lhc13-all-pt1-50k-r1_h022_e0175_t220_nonu_truth.z', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--clock-period', type=float, default=1.0, + help="Target clock frequency to use during Vivado synthesis (default: %(default)s)") + parser.add_argument('--dataset-config', type=str, default='config/yaml_IP_OP_config.yml', + help="The file to use to configure the input dataset (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the log output of the training run and the output model (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + parser.add_argument('--histograms', type=str, default=None, + help="The checkpoint histograms of LUT usage (default: %(default)s)") + parser.add_argument('--freq-thresh', type=int, default=None, + help="Threshold to use to include this truth table into the model (default: %(default)s)") + parser.add_argument('--num-registers', type=int, default=0, + help="The number of registers to add to the generated verilog (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset["train"] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split="train") + dataset["test"] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split="test") + train_loader = DataLoader(dataset["train"], batch_size=config['batch_size'], shuffle=False) + test_loader = DataLoader(dataset["test"], batch_size=config['batch_size'], shuffle=False) + + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = len(y) + model = JetSubstructureNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference on baseline model...") + model.eval() + baseline_accuracy, baseline_avg_roc_auc = test(model, test_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + print("Baseline AVG ROC AUC: %f" % (baseline_avg_roc_auc)) + + # Run preprocessing on training set. + #train_input_file = config['log_dir'] + "/train_input.txt" + #train_output_file = config['log_dir'] + "/train_output.txt" + #test_input_file = config['log_dir'] + "/test_input.txt" + #test_output_file = config['log_dir'] + "/test_output.txt" + #print(f"Dumping train I/O to {train_input_file} and {train_output_file}") + #dump_io(model, train_loader, train_input_file, train_output_file) + #print(f"Dumping test I/O to {test_input_file} and {test_output_file}") + #dump_io(model, test_loader, test_input_file, test_output_file) + + # Instantiate LUT-based model + lut_model = JetSubstructureLutModel(model_cfg) + lut_model.load_state_dict(checkpoint['model_dict']) + + # Generate the truth tables in the LUT module + print("Converting to NEQs to LUTs...") + generate_truth_tables(lut_model, verbose=True) + + # Test the LUT-based model + print("Running inference on LUT-based model...") + lut_inference(lut_model) + lut_model.eval() + lut_accuracy, lut_avg_roc_auc = test(lut_model, test_loader, cuda=False) + print("LUT-Based Model accuracy: %f" % (lut_accuracy)) + print("LUT-Based AVG ROC AUC: %f" % (lut_avg_roc_auc)) + modelSave = { 'model_dict': lut_model.state_dict(), + 'test_accuracy': lut_accuracy, + 'test_avg_roc_auc': lut_avg_roc_auc} + + torch.save(modelSave, options_cfg["log_dir"] + "/lut_based_model.pth") + if options_cfg["histograms"] is not None: + luts = torch.load(options_cfg["histograms"]) + load_histograms(lut_model, luts) + + print("Generating verilog in %s..." % (options_cfg["log_dir"])) + module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=True, add_registers=False) + print("Top level entity stored at: %s/logicnet.v ..." % (options_cfg["log_dir"])) + + print("Running synthesis and verilog technology-mapped verilog in ABC") + train_accuracy, test_accuracy, nodes, average_care_set_size = synthesize_and_get_resource_counts_with_abc(options_cfg["log_dir"], lut_model.module_list, pipeline_stages=args.num_registers, freq_thresh=args.freq_thresh, train_input_txt="train_input.txt", train_output_txt="train_output.txt", test_input_txt="test_input.txt", test_output_txt="test_output.txt", bdd_opt_cmd="&ttopt", verbose=False) + print(f"Training set accuracy(%): {train_accuracy}") + print(f"Test set accuracy(%): {test_accuracy}") + print(f"LUT6(#): {nodes}") + print(f"Average care set sizes(%): {average_care_set_size}") + diff --git a/examples/jet_substructure/simulate_verilog.py b/examples/jet_substructure/simulate_verilog.py new file mode 100644 index 000000000..0aecf6f44 --- /dev/null +++ b/examples/jet_substructure/simulate_verilog.py @@ -0,0 +1,114 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from train import configs, model_config, dataset_config, test +from dataset import JetSubstructureDataset +from models import JetSubstructureNeqModel + +other_options = { + "checkpoint": None, + "input_verilog": None, + "num_registers": None, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Synthesize convert a PyTorch trained model into verilog") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/processed-pythia82-lhc13-all-pt1-50k-r1_h022_e0175_t220_nonu_truth.z', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--dataset-config', type=str, default='config/yaml_IP_OP_config.yml', + help="The file to use to configure the input dataset (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + parser.add_argument('--input-verilog', type=str, required=True, + help="The input verilog file to simulate") + parser.add_argument('--num-registers', type=int, default=0, + help="The number of pipeline registers in the verilog (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset[args.dataset_split] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split=args.dataset_split) + test_loader = DataLoader(dataset[args.dataset_split], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = len(y) + model = JetSubstructureNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference on baseline model...") + model.eval() + baseline_accuracy, baseline_avg_roc_auc = test(model, test_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + print("Baseline AVG ROC AUC: %f" % (baseline_avg_roc_auc)) + + verilog_dir = os.path.dirname(options_cfg["input_verilog"]) + filename = os.path.split(options_cfg["input_verilog"])[-1] + print(f"Running inference simulation of Verilog-based model ({filename})") + model.verilog_inference(verilog_dir, filename, logfile=None, add_registers=options_cfg["num_registers"] != 0, verify=False) + model.latency = options_cfg["num_registers"] + verilog_accuracy, verilog_avg_roc_auc = test(model, test_loader, cuda=False) + print("Verilog-Based Model accuracy: %f" % (verilog_accuracy)) + print("Verilog-Based AVG ROC AUC: %f" % (verilog_avg_roc_auc)) + diff --git a/examples/jet_substructure/train.py b/examples/jet_substructure/train.py index f9e3869b0..ea608d441 100644 --- a/examples/jet_substructure/train.py +++ b/examples/jet_substructure/train.py @@ -46,6 +46,8 @@ "learning_rate": 1e-3, "seed": 2, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "jsc-m": { "hidden_layers": [64, 32, 32, 32], @@ -61,6 +63,8 @@ "learning_rate": 1e-3, "seed": 3, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "jsc-l": { "hidden_layers": [32, 64, 192, 192, 16], @@ -76,6 +80,8 @@ "learning_rate": 1e-3, "seed": 16, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, } diff --git a/examples/jet_substructure/verilog2bitfile.py b/examples/jet_substructure/verilog2bitfile.py new file mode 100644 index 000000000..3dfb7b226 --- /dev/null +++ b/examples/jet_substructure/verilog2bitfile.py @@ -0,0 +1,28 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from argparse import ArgumentParser + +from logicnets.synthesis import synthesize_and_get_resource_counts + +if __name__ == "__main__": + parser = ArgumentParser(description="Synthesize convert a PyTorch trained model into verilog") + parser.add_argument('--log-dir', type=str, default='./log', required=True, + help="A location to store the log output of the training run and the output model (default: %(default)s)") + parser.add_argument('--clock-period', type=float, default=1.0, + help="Target clock frequency to use during Vivado synthesis (default: %(default)s)") + args = parser.parse_args() + print("Running out-of-context synthesis") + ret = synthesize_and_get_resource_counts(args.log_dir, "logicnet", fpga_part="xcu280-fsvh2892-2L-e", clk_period_ns=args.clock_period, post_synthesis = 1) + diff --git a/examples/mnist/README.md b/examples/mnist/README.md new file mode 100644 index 000000000..c157551fd --- /dev/null +++ b/examples/mnist/README.md @@ -0,0 +1,68 @@ +# LogicNets for MNIST Classification + +This example shows the accuracy that is attainable using the LogicNets methodology on the MNIST hand-written character classification task. + +## Prerequisites + +* LogicNets +* numpy +* torchvision + +## Installation + +If you're using the docker image, all the above prerequisites will be already installed. +Otherwise, you can install the above dependencies with pip and/or conda. + +## Download the Dataset + +The MNIST dataset will download automatically when the training script is first run. +You only need to make sure the necessary directory has been created: + +```bash +mkdir -p data +``` + +## Usage + +To train the \"MNIST-S\", \"MNIST-M\" and \"MNIST-L\" networks, +run the following: + +```bash +python train.py --arch --log-dir .// +``` + +To then generate verilog from this trained model, run the following: + +```bash +python neq2lut.py --arch --checkpoint .//best_accuracy.pth --log-dir .//verilog/ --add-registers +``` + +## Results + +Your results may vary slightly, depending on your system configuration. +The following results are attained when training on a CPU and synthesising with Vivado 2019.2: + +| Network Architecture | Test Accuracy (%) | LUTs | Flip Flops | Fmax (Mhz) | Latency (Cycles) | +| --------------------- | ----------------- | ----- | ------------- | ------------- | ----------------- | +| MNIST-S | | | | | | +| MNIST-M | | | | | | +| MNIST-L | | | | | | + +## Citation + +If you find this work useful for your research, please consider citing +our paper below: + +```bibtex +@inproceedings{umuroglu2020logicnets, + author = {Umuroglu, Yaman and Akhauri, Yash and Fraser, Nicholas J and Blott, Michaela}, + booktitle = {Proceedings of the International Conference on Field-Programmable Logic and Applications}, + title = {LogicNets: Co-Designed Neural Networks and Circuits for Extreme-Throughput Applications}, + year = {2020}, + pages = {291-297}, + publisher = {IEEE Computer Society}, + address = {Los Alamitos, CA, USA}, + month = {sep} +} +``` + diff --git a/examples/mnist/abc.patch b/examples/mnist/abc.patch new file mode 100644 index 000000000..8de1ebdbc --- /dev/null +++ b/examples/mnist/abc.patch @@ -0,0 +1,49 @@ +diff --git a/src/aig/gia/giaMinLut.c b/src/aig/gia/giaMinLut.c +index 5304486d..e08ebec5 100644 +--- a/src/aig/gia/giaMinLut.c ++++ b/src/aig/gia/giaMinLut.c +@@ -193,7 +193,7 @@ Gia_Man_t * Vec_WrdReadTest( char * pFileName ) + void Vec_WrdReadText( char * pFileName, Vec_Wrd_t ** pvSimI, Vec_Wrd_t ** pvSimO, int nIns, int nOuts ) + { + int i, nSize, iLine, nLines, nWords; +- char pLine[1000]; ++ char pLine[2000]; + Vec_Wrd_t * vSimI, * vSimO; + FILE * pFile = fopen( pFileName, "rb" ); + if ( pFile == NULL ) +@@ -214,7 +214,7 @@ void Vec_WrdReadText( char * pFileName, Vec_Wrd_t ** pvSimI, Vec_Wrd_t ** pvSimO + nWords = (nLines + 63)/64; + vSimI = Vec_WrdStart( nIns *nWords ); + vSimO = Vec_WrdStart( nOuts*nWords ); +- for ( iLine = 0; fgets( pLine, 1000, pFile ); iLine++ ) ++ for ( iLine = 0; fgets( pLine, 2000, pFile ); iLine++ ) + { + for ( i = 0; i < nIns; i++ ) + if ( pLine[nIns-1-i] == '1' ) +@@ -233,7 +233,7 @@ void Vec_WrdReadText( char * pFileName, Vec_Wrd_t ** pvSimI, Vec_Wrd_t ** pvSimO + int Vec_WrdReadText2( char * pFileName, Vec_Wrd_t ** pvSimI ) + { + int i, nSize, iLine, nLines, nWords, nIns; +- char pLine[1000]; ++ char pLine[2000]; + Vec_Wrd_t * vSimI; + FILE * pFile = fopen( pFileName, "rb" ); + if ( pFile == NULL ) +@@ -241,7 +241,7 @@ int Vec_WrdReadText2( char * pFileName, Vec_Wrd_t ** pvSimI ) + printf( "Cannot open file \"%s\" for reading.\n", pFileName ); + return 0; + } +- if ( !fgets(pLine, 1000, pFile) || (nIns = strlen(pLine)-1) < 1 ) ++ if ( !fgets(pLine, 2000, pFile) || (nIns = strlen(pLine)-1) < 1 ) + { + printf( "Cannot find the number of inputs in file \"%s\".\n", pFileName ); + fclose( pFile ); +@@ -259,7 +259,7 @@ int Vec_WrdReadText2( char * pFileName, Vec_Wrd_t ** pvSimI ) + nLines = nSize / (nIns + 1); + nWords = (nLines + 63)/64; + vSimI = Vec_WrdStart( nIns *nWords ); +- for ( iLine = 0; fgets( pLine, 1000, pFile ); iLine++ ) ++ for ( iLine = 0; fgets( pLine, 2000, pFile ); iLine++ ) + { + for ( i = 0; i < nIns; i++ ) + if ( pLine[nIns-1-i] == '1' ) diff --git a/examples/mnist/blif2verilog.py b/examples/mnist/blif2verilog.py new file mode 100644 index 000000000..1c5feda7d --- /dev/null +++ b/examples/mnist/blif2verilog.py @@ -0,0 +1,144 @@ +# Copyright (C) 2022 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A script to convert technology-mapped BLIF files to technology mapped verilog. + +import os +import glob +import shutil +from argparse import ArgumentParser +from functools import partial + +import torch +from torch.utils.data import DataLoader +from torchvision.datasets import MNIST +from torchvision import transforms + +from logicnets.abc import tech_map_to_verilog,\ + putontop_blif,\ + pipeline_tech_mapped_circuit +from logicnets.verilog import fix_abc_module_name,\ + generate_abc_verilog_wrapper + +from models import MnistNeqModel +from train import configs, model_config, test + +other_options = { + "output_directory": None, + "input_blifs": None, + "num_registers": None, + "generated_module_name_prefix": None, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Convert a technology-mapped BLIF files into a technology-mapped verilog file, using ABC") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="mnist-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--input-dropout', type=float, default=None, + help="The amount of dropout to apply at the model input (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--input-blifs', nargs='+', type=str, required=True, + help="The input BLIF files") + parser.add_argument('--output-directory', type=str, default='./log', + help="The directory which the generated verilog gets stored. (default: %(default)s)") + parser.add_argument('--num-registers', type=int, default=0, + help="The number of registers to add to the generated verilog (default: %(default)s)") + parser.add_argument('--generated-module-name-prefix', type=str, default='\\aig', + help="A prefix which matches the module name in the generated verilog, but no other line of code (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(args.output_directory): + os.makedirs(args.output_directory) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + options_cfg[k] = config[k] + + trans = transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + transforms.Lambda(partial(torch.reshape, shape=(-1,))) + ]) + + # Fetch the test set + dataset = {} + dataset[args.dataset_split] = MNIST('./data', train=args.dataset_split == "train", download=True, transform=trans) + test_loader = DataLoader(dataset[args.dataset_split], batch_size=1, shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = 10 + model = MnistNeqModel(model_cfg) + + abc_project_root = options_cfg['output_directory'] + veropt_dir = options_cfg['output_directory'] + input_blif = "layers_full_opt.blif" + verbose = False + + if len(options_cfg['input_blifs']) > 1: + nodes, out, err = putontop_blif([os.path.realpath(blif) for blif in options_cfg['input_blifs']], input_blif, working_dir=abc_project_root, verbose=verbose) + else: + shutil.copy(os.path.realpath(options_cfg['input_blifs'][0]), f"{abc_project_root}/{input_blif}") + + if options_cfg['num_registers'] == 0: + nodes, out, err = tech_map_to_verilog(circuit_file=input_blif, output_verilog=f"layers_full_opt.v", working_dir=abc_project_root, verbose=verbose) + else: + nodes, out, err = pipeline_tech_mapped_circuit(circuit_file=input_blif, output_verilog=f"layers_full_opt.v", num_registers=options_cfg['num_registers'], working_dir=abc_project_root, verbose=verbose) + + # Fix the resultant verilog file so that it can be simulated + fix_abc_module_name(f"{veropt_dir}/layers_full_opt.v", f"{veropt_dir}/layers_full_opt.v", options_cfg["generated_module_name_prefix"], "layers_full_opt", add_timescale=True) + + # Generate top-level entity wrapper + module_list = model.module_list + _, input_bitwidth = module_list[0].input_quant.get_scale_factor_bits() + _, output_bitwidth = module_list[-1].output_quant.get_scale_factor_bits() + input_bitwidth, output_bitwidth = int(input_bitwidth), int(output_bitwidth) + total_input_bits = module_list[0].in_features*input_bitwidth + total_output_bits = module_list[-1].out_features*output_bitwidth + module_name="logicnet" + veropt_wrapper_str = generate_abc_verilog_wrapper(module_name=module_name, input_name="M0", input_bits=total_input_bits, output_name=f"M{len(module_list)}", output_bits=total_output_bits, submodule_name="layers_full_opt", num_registers=options_cfg['num_registers']) + with open(f"{veropt_dir}/{module_name}.v", "w") as f: + f.write(veropt_wrapper_str) + + print(f"Adding Nitro-Parts-Lib to {veropt_dir}") + source_files = glob.glob(f"{os.environ['NITROPARTSLIB']}/*.v") + for f in source_files: + shutil.copy(f, f"{veropt_dir}") + diff --git a/examples/mnist/dataset_dump.py b/examples/mnist/dataset_dump.py new file mode 100644 index 000000000..0e5963f6f --- /dev/null +++ b/examples/mnist/dataset_dump.py @@ -0,0 +1,133 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser +from functools import reduce, partial + +import torch +from torch.utils.data import DataLoader +from torchvision.datasets import MNIST +from torchvision import transforms + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + module_list_to_verilog_module +from logicnets.synthesis import synthesize_and_get_resource_counts + +from train import configs, model_config, other_options, test +from models import MnistNeqModel, MnistLutModel + +def dump_io(model, data_loader, input_file, output_file): + input_quant = model.module_list[0].input_quant + _, input_bitwidth = input_quant.get_scale_factor_bits() + input_bitwidth = int(input_bitwidth) + total_input_bits = model.module_list[0].in_features*input_bitwidth + input_quant.bin_output() + with open(input_file, 'w') as i_f, open(output_file, 'w') as o_f: + for data, target in data_loader: + x = input_quant(data) + indices = target + for i in range(x.shape[0]): + x_i = x[i,:] + xv_i = list(map(lambda z: input_quant.get_bin_str(z), x_i)) + xvc_i = reduce(lambda a,b: a+b, xv_i[::-1]) + i_f.write(f"{int(xvc_i,2):0{int(total_input_bits)}b}\n") + o_f.write(f"{int(indices[i])}\n") + +if __name__ == "__main__": + parser = ArgumentParser(description="Dump the train and test datasets (after input quantization) into text files") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="mnist-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--input-dropout', type=float, default=None, + help="The amount of dropout to apply at the model input (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the output I/O text files (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + trans = transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + transforms.Lambda(partial(torch.reshape, shape=(-1,))) + ]) + + # Fetch the datasets + dataset = {} + dataset['train'] = MNIST('./data', train=True, download=True, transform=trans) + dataset['test'] = MNIST('./data', train=False, download=True, transform=trans) + train_loader = DataLoader(dataset["train"], batch_size=config['batch_size'], shuffle=False) + test_loader = DataLoader(dataset["test"], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset["train"][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = 10 + model = MnistNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference on baseline model...") + model.eval() + baseline_accuracy = test(model, test_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + + # Run preprocessing on training set. + train_input_file = config['log_dir'] + "/train_input.txt" + train_output_file = config['log_dir'] + "/train_output.txt" + test_input_file = config['log_dir'] + "/test_input.txt" + test_output_file = config['log_dir'] + "/test_output.txt" + print(f"Dumping train I/O to {train_input_file} and {train_output_file}") + dump_io(model, train_loader, train_input_file, train_output_file) + print(f"Dumping test I/O to {test_input_file} and {test_output_file}") + dump_io(model, test_loader, test_input_file, test_output_file) diff --git a/examples/mnist/dump_luts.py b/examples/mnist/dump_luts.py new file mode 100644 index 000000000..0b0604071 --- /dev/null +++ b/examples/mnist/dump_luts.py @@ -0,0 +1,125 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser +from functools import partial + +import torch +from torch.utils.data import DataLoader +from torchvision.datasets import MNIST +from torchvision import transforms + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + save_luts, \ + module_list_to_verilog_module + +from models import MnistNeqModel, MnistLutModel +from train import configs, model_config, other_options, test +from logicnets.synthesis import synthesize_and_get_resource_counts + +if __name__ == "__main__": + parser = ArgumentParser(description="Generate histograms of states used throughout LogicNets") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="mnist-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--input-dropout', type=float, default=None, + help="The amount of dropout to apply at the model input (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the calculated histograms (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + trans = transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + transforms.Lambda(partial(torch.reshape, shape=(-1,))) + ]) + + # Fetch the test set + dataset = {} + dataset["train"] = MNIST('./data', train=True, download=True, transform=trans) + train_loader = DataLoader(dataset["train"], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset["train"][0] + dataset_length = len(dataset['train']) + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = 10 + model = MnistNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference of baseline model on training set (%d examples)..." % (dataset_length)) + model.eval() + baseline_accuracy = test(model, train_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + + # Instantiate LUT-based model + lut_model = MnistLutModel(model_cfg) + lut_model.load_state_dict(checkpoint['model_dict']) + + # Generate the truth tables in the LUT module + print("Converting to NEQs to LUTs...") + generate_truth_tables(lut_model, verbose=True) + + # Test the LUT-based model + print("Running inference of LUT-based model training set (%d examples)..." % (dataset_length)) + lut_inference(lut_model, track_used_luts=True) + lut_model.eval() + lut_accuracy = test(lut_model, train_loader, cuda=False) + print("LUT-Based Model accuracy: %f" % (lut_accuracy)) + print("Saving LUTs to %s... " % (options_cfg["log_dir"] + "/luts.pth")) + save_luts(lut_model, options_cfg["log_dir"] + "/luts.pth") + print("Done!") + diff --git a/examples/mnist/models.py b/examples/mnist/models.py new file mode 100644 index 000000000..1afeaf21e --- /dev/null +++ b/examples/mnist/models.py @@ -0,0 +1,148 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import reduce +from os.path import realpath + +import torch +import torch.nn as nn +from torch.nn.parameter import Parameter +from torch.nn import init + +from brevitas.core.quant import QuantType +from brevitas.core.scaling import ScalingImplType +from brevitas.nn import QuantHardTanh, QuantReLU + +from pyverilator import PyVerilator + +from logicnets.quant import QuantBrevitasActivation +from logicnets.nn import SparseLinearNeq, ScalarBiasScale, RandomFixedSparsityMask2D, DenseMask2D +from logicnets.init import random_restrict_fanin + +class MnistNeqModel(nn.Module): + def __init__(self, model_config): + super(MnistNeqModel, self).__init__() + self.model_config = model_config + self.num_neurons = [model_config["input_length"]] + model_config["hidden_layers"] + [model_config["output_length"]] + layer_list = [] + for i in range(1, len(self.num_neurons)): + in_features = self.num_neurons[i-1] + out_features = self.num_neurons[i] + bn = nn.BatchNorm1d(out_features) + nn.init.constant_(bn.weight.data, 1) + nn.init.constant_(bn.bias.data, 0) + if i == 1: + do_in = nn.Dropout(p=model_config["input_dropout"]) + bn_in = nn.BatchNorm1d(in_features) + nn.init.constant_(bn_in.weight.data, 1) + nn.init.constant_(bn_in.bias.data, 0) + input_bias = ScalarBiasScale(scale=False, bias_init=-0.25) + input_quant = QuantBrevitasActivation(QuantHardTanh(model_config["input_bitwidth"], max_val=1., narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[do_in, bn_in, input_bias]) + output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn]) + mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["input_fanin"]) + layer = SparseLinearNeq(in_features, out_features, input_quant=input_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}) + layer_list.append(layer) + elif i == len(self.num_neurons)-1: + output_bias_scale = ScalarBiasScale(bias_init=0.33) + output_quant = QuantBrevitasActivation(QuantHardTanh(bit_width=model_config["output_bitwidth"], max_val=1.33, narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn], post_transforms=[output_bias_scale]) + mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["output_fanin"]) + layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False) + layer_list.append(layer) + else: + output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn]) + mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["hidden_fanin"]) + layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False) + layer_list.append(layer) + self.module_list = nn.ModuleList(layer_list) + self.is_verilog_inference = False + self.latency = 1 + self.verilog_dir = None + self.top_module_filename = None + self.dut = None + self.verify = True + self.logfile = None + + def verilog_inference(self, verilog_dir, top_module_filename, logfile: bool = False, add_registers: bool = False, verify: bool = True): + self.verilog_dir = realpath(verilog_dir) + self.top_module_filename = top_module_filename + self.dut = PyVerilator.build(f"{self.verilog_dir}/{self.top_module_filename}", verilog_path=[self.verilog_dir], build_dir=f"{self.verilog_dir}/verilator", command_args=("--x-assign","0",)) + self.is_verilog_inference = True + self.verify = verify + self.logfile = logfile + if add_registers: + self.latency = len(self.num_neurons) + + def pytorch_inference(self): + self.is_verilog_inference = False + + def verilog_forward(self, x): + # Get integer output from the first layer + input_quant = self.module_list[0].input_quant + output_quant = self.module_list[-1].output_quant + _, input_bitwidth = self.module_list[0].input_quant.get_scale_factor_bits() + _, output_bitwidth = self.module_list[-1].output_quant.get_scale_factor_bits() + input_bitwidth, output_bitwidth = int(input_bitwidth), int(output_bitwidth) + total_input_bits = self.module_list[0].in_features*input_bitwidth + total_output_bits = self.module_list[-1].out_features*output_bitwidth + num_layers = len(self.module_list) + input_quant.bin_output() + self.module_list[0].apply_input_quant = False + y = torch.zeros(x.shape[0], self.module_list[-1].out_features) + x = input_quant(x) + self.dut.io.rst = 0 + self.dut.io.clk = 0 + for i in range(x.shape[0]): + x_i = x[i,:] + xv_i = list(map(lambda z: input_quant.get_bin_str(z), x_i)) + xvc_i = reduce(lambda a,b: a+b, xv_i[::-1]) + self.dut["M0"] = int(xvc_i, 2) + for j in range(self.latency + 1): + #print(self.dut.io.M5) + res = self.dut[f"M{num_layers}"] + result = f"{res:0{int(total_output_bits)}b}" + self.dut.io.clk = 1 + self.dut.io.clk = 0 + result = f"{res:0{int(total_output_bits)}b}" + if self.verify: + y_i = self.pytorch_forward(x[i:i+1,:])[0] + ys_i = list(map(lambda z: output_quant.get_bin_str(z), y_i)) + ysc_i = reduce(lambda a,b: a+b, ys_i[::-1]) + expected = f"{int(ysc_i,2):0{int(total_output_bits)}b}" + assert(expected == result) + res_split = [result[i:i+output_bitwidth] for i in range(0, len(result), output_bitwidth)][::-1] + yv_i = torch.Tensor(list(map(lambda z: int(z, 2), res_split))) + y[i,:] = yv_i + # Dump the I/O pairs + if self.logfile is not None: + with open(self.logfile, "a") as f: + f.write(f"{int(xvc_i,2):0{int(total_input_bits)}b}{int(ysc_i,2):0{int(total_output_bits)}b}\n") + return y + + def pytorch_forward(self, x): + for l in self.module_list: + x = l(x) + return x + + def forward(self, x): + if self.is_verilog_inference: + return self.verilog_forward(x) + else: + return self.pytorch_forward(x) + +class MnistLutModel(MnistNeqModel): + pass + +class MnistVerilogModel(MnistNeqModel): + pass + diff --git a/examples/mnist/neq2lut.py b/examples/mnist/neq2lut.py new file mode 100644 index 000000000..533ae6e4b --- /dev/null +++ b/examples/mnist/neq2lut.py @@ -0,0 +1,187 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser +from functools import partial + +import torch +from torch.utils.data import DataLoader +from torchvision.datasets import MNIST +from torchvision import transforms + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + module_list_to_verilog_module, \ + load_histograms +from logicnets.synthesis import synthesize_and_get_resource_counts +from logicnets.util import proc_postsynth_file + +from models import MnistNeqModel, MnistLutModel, MnistVerilogModel +from train import configs, model_config, test + +other_options = { + "cuda": None, + "log_dir": None, + "checkpoint": None, + "generate_bench": False, + "add_registers": False, + "histograms": None, + "freq_thresh": None, + "simulate_pre_synthesis_verilog": False, + "simulate_post_synthesis_verilog": False, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Synthesize convert a PyTorch trained model into verilog") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="mnist-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--input-dropout', type=float, default=None, + help="The amount of dropout to apply at the model input (default: %(default)s)") + parser.add_argument('--clock-period', type=float, default=1.0, + help="Target clock frequency to use during Vivado synthesis (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the log output of the training run and the output model (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + parser.add_argument('--histograms', type=str, default=None, + help="The checkpoint histograms of LUT usage (default: %(default)s)") + parser.add_argument('--freq-thresh', type=int, default=None, + help="Threshold to use to include this truth table into the model (default: %(default)s)") + parser.add_argument('--generate-bench', action='store_true', default=False, + help="Generate the truth table in BENCH format as well as verilog (default: %(default)s)") + parser.add_argument('--dump-io', action='store_true', default=False, + help="Dump I/O to the verilog LUT to a text file in the log directory (default: %(default)s)") + parser.add_argument('--add-registers', action='store_true', default=False, + help="Add registers between each layer in generated verilog (default: %(default)s)") + parser.add_argument('--simulate-pre-synthesis-verilog', action='store_true', default=False, + help="Simulate the verilog generated by LogicNets (default: %(default)s)") + parser.add_argument('--simulate-post-synthesis-verilog', action='store_true', default=False, + help="Simulate the post-synthesis verilog produced by vivado (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + trans = transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + transforms.Lambda(partial(torch.reshape, shape=(-1,))) + ]) + + # Fetch the test set + dataset = {} + dataset[args.dataset_split] = MNIST('./data', train=args.dataset_split == "train", download=True, transform=trans) + test_loader = DataLoader(dataset[args.dataset_split], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = 10 + model = MnistNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference on baseline model...") + model.eval() + baseline_accuracy = test(model, test_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + + # Instantiate LUT-based model + lut_model = MnistLutModel(model_cfg) + lut_model.load_state_dict(checkpoint['model_dict']) + + # Generate the truth tables in the LUT module + print("Converting to NEQs to LUTs...") + generate_truth_tables(lut_model, verbose=True) + + # Test the LUT-based model + print("Running inference on LUT-based model...") + lut_inference(lut_model) + lut_model.eval() + lut_accuracy = test(lut_model, test_loader, cuda=False) + print("LUT-Based Model accuracy: %f" % (lut_accuracy)) + modelSave = { 'model_dict': lut_model.state_dict(), + 'test_accuracy': lut_accuracy} + + torch.save(modelSave, options_cfg["log_dir"] + "/lut_based_model.pth") + if options_cfg["histograms"] is not None: + luts = torch.load(options_cfg["histograms"]) + load_histograms(lut_model, luts) + + print("Generating verilog in %s..." % (options_cfg["log_dir"])) + module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=options_cfg["generate_bench"], add_registers=options_cfg["add_registers"], freq_thresh=options_cfg["freq_thresh"]) + print("Top level entity stored at: %s/logicnet.v ..." % (options_cfg["log_dir"])) + + if args.dump_io: + io_filename = options_cfg["log_dir"] + f"io_{args.dataset_split}.txt" + with open(io_filename, 'w') as f: + pass # Create an empty file. + print(f"Dumping verilog I/O to {io_filename}...") + else: + io_filename = None + + if args.simulate_pre_synthesis_verilog: + print("Running inference simulation of Verilog-based model...") + lut_model.verilog_inference(options_cfg["log_dir"], "logicnet.v", logfile=io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) + verilog_accuracy = test(lut_model, test_loader, cuda=False) + print("Verilog-Based Model accuracy: %f" % (verilog_accuracy)) + + print("Running out-of-context synthesis") + ret = synthesize_and_get_resource_counts(options_cfg["log_dir"], "logicnet", fpga_part="xcu280-fsvh2892-2L-e", clk_period_ns=args.clock_period, post_synthesis = 1) + + if args.simulate_post_synthesis_verilog: + print("Running post-synthesis inference simulation of Verilog-based model...") + proc_postsynth_file(options_cfg["log_dir"]) + lut_model.verilog_inference(options_cfg["log_dir"]+"/post_synth", "logicnet_post_synth.v", io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) + post_synth_accuracy = test(lut_model, test_loader, cuda=False) + print("Post-synthesis Verilog-Based Model accuracy: %f" % (post_synth_accuracy)) + diff --git a/examples/mnist/neq2lut_abc.py b/examples/mnist/neq2lut_abc.py new file mode 100644 index 000000000..9ab762f8c --- /dev/null +++ b/examples/mnist/neq2lut_abc.py @@ -0,0 +1,172 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser +from functools import partial + +import torch +from torch.utils.data import DataLoader +from torchvision.datasets import MNIST +from torchvision import transforms + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + module_list_to_verilog_module, \ + load_histograms +from logicnets.synthesis import synthesize_and_get_resource_counts_with_abc + +from models import MnistNeqModel, MnistLutModel +from train import configs, model_config, test +from dataset_dump import dump_io + +other_options = { + "cuda": None, + "log_dir": None, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Synthesize convert a PyTorch trained model into verilog using ABC") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="mnist-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--input-dropout', type=float, default=None, + help="The amount of dropout to apply at the model input (default: %(default)s)") + parser.add_argument('--clock-period', type=float, default=1.0, + help="Target clock frequency to use during Vivado synthesis (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the log output of the training run and the output model (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + parser.add_argument('--histograms', type=str, default=None, + help="The checkpoint histograms of LUT usage (default: %(default)s)") + parser.add_argument('--freq-thresh', type=int, default=None, + help="Threshold to use to include this truth table into the model (default: %(default)s)") + parser.add_argument('--num-registers', type=int, default=0, + help="The number of registers to add to the generated verilog (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + trans = transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + transforms.Lambda(partial(torch.reshape, shape=(-1,))) + ]) + + # Fetch the test set + dataset = {} + dataset["train"] = MNIST('./data', train=True, download=True, transform=trans) + dataset["test"] = MNIST('./data', train=False, download=True, transform=trans) + train_loader = DataLoader(dataset["train"], batch_size=config['batch_size'], shuffle=False) + test_loader = DataLoader(dataset["test"], batch_size=config['batch_size'], shuffle=False) + + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = 10 + model = MnistNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference on baseline model...") + model.eval() + baseline_accuracy = test(model, test_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + + # Run preprocessing on training set. + #train_input_file = config['log_dir'] + "/train_input.txt" + #train_output_file = config['log_dir'] + "/train_output.txt" + #test_input_file = config['log_dir'] + "/test_input.txt" + #test_output_file = config['log_dir'] + "/test_output.txt" + #print(f"Dumping train I/O to {train_input_file} and {train_output_file}") + #dump_io(model, train_loader, train_input_file, train_output_file) + #print(f"Dumping test I/O to {test_input_file} and {test_output_file}") + #dump_io(model, test_loader, test_input_file, test_output_file) + + # Instantiate LUT-based model + lut_model = MnistLutModel(model_cfg) + lut_model.load_state_dict(checkpoint['model_dict']) + + # Generate the truth tables in the LUT module + print("Converting to NEQs to LUTs...") + generate_truth_tables(lut_model, verbose=True) + + # Test the LUT-based model + print("Running inference on LUT-based model...") + lut_inference(lut_model) + lut_model.eval() + lut_accuracy = test(lut_model, test_loader, cuda=False) + print("LUT-Based Model accuracy: %f" % (lut_accuracy)) + print("LUT-Based AVG ROC AUC: %f" % (lut_avg_roc_auc)) + modelSave = { 'model_dict': lut_model.state_dict(), + 'test_accuracy': lut_accuracy} + + torch.save(modelSave, options_cfg["log_dir"] + "/lut_based_model.pth") + if options_cfg["histograms"] is not None: + luts = torch.load(options_cfg["histograms"]) + load_histograms(lut_model, luts) + + print("Generating verilog in %s..." % (options_cfg["log_dir"])) + module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=True, add_registers=False) + print("Top level entity stored at: %s/logicnet.v ..." % (options_cfg["log_dir"])) + + print("Running synthesis and verilog technology-mapped verilog in ABC") + train_accuracy, test_accuracy, nodes, average_care_set_size = synthesize_and_get_resource_counts_with_abc(options_cfg["log_dir"], lut_model.module_list, pipeline_stages=args.num_registers, freq_thresh=args.freq_thresh, train_input_txt="train_input.txt", train_output_txt="train_output.txt", test_input_txt="test_input.txt", test_output_txt="test_output.txt", bdd_opt_cmd="&ttopt", verbose=False) + print(f"Training set accuracy(%): {train_accuracy}") + print(f"Test set accuracy(%): {test_accuracy}") + print(f"LUT6(#): {nodes}") + print(f"Average care set sizes(%): {average_care_set_size}") + diff --git a/examples/mnist/requirements.txt b/examples/mnist/requirements.txt new file mode 100644 index 000000000..ac3ab55bf --- /dev/null +++ b/examples/mnist/requirements.txt @@ -0,0 +1,17 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +numpy +tensorboard +torchvision diff --git a/examples/mnist/simulate_verilog.py b/examples/mnist/simulate_verilog.py new file mode 100644 index 000000000..bfcbb8056 --- /dev/null +++ b/examples/mnist/simulate_verilog.py @@ -0,0 +1,115 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser +from functools import partial + +import torch +from torch.utils.data import DataLoader +from torchvision.datasets import MNIST +from torchvision import transforms + +from models import MnistNeqModel +from train import configs, model_config, test + +other_options = { + "checkpoint": None, + "input_verilog": None, + "num_registers": None, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Synthesize convert a PyTorch trained model into verilog") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="mnist-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--input-dropout', type=float, default=None, + help="The amount of dropout to apply at the model input (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + parser.add_argument('--input-verilog', type=str, required=True, + help="The input verilog file to simulate") + parser.add_argument('--num-registers', type=int, default=0, + help="The number of pipeline registers in the verilog (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + trans = transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + transforms.Lambda(partial(torch.reshape, shape=(-1,))) + ]) + + # Fetch the test set + dataset = {} + dataset[args.dataset_split] = MNIST('./data', train=args.dataset_split == "train", download=True, transform=trans) + test_loader = DataLoader(dataset[args.dataset_split], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = 10 + model = MnistNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference on baseline model...") + model.eval() + baseline_accuracy = test(model, test_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + + verilog_dir = os.path.dirname(options_cfg["input_verilog"]) + filename = os.path.split(options_cfg["input_verilog"])[-1] + print(f"Running inference simulation of Verilog-based model ({filename})") + model.verilog_inference(verilog_dir, filename, logfile=None, add_registers=options_cfg["num_registers"] != 0, verify=False) + model.latency = options_cfg["num_registers"] + verilog_accuracy = test(model, test_loader, cuda=False) + print("Verilog-Based Model accuracy: %f" % (verilog_accuracy)) + diff --git a/examples/mnist/train.py b/examples/mnist/train.py new file mode 100644 index 000000000..e3a5f1da6 --- /dev/null +++ b/examples/mnist/train.py @@ -0,0 +1,491 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser +from functools import reduce, partial +import random + +import numpy as np + +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter +from torchvision import transforms +from torchvision.datasets import MNIST + +from models import MnistNeqModel + +# TODO: Replace default configs with YAML files. +configs = { + "mnist-xxs": { + "hidden_layers": [1024, 1024, 1024, 128], + "input_bitwidth": 1, + "hidden_bitwidth": 1, + "output_bitwidth": 4, + "input_fanin": 8, + "hidden_fanin": 8, + "output_fanin": 8, + "input_dropout": 0.01, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 0, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-xs": { + "hidden_layers": [1024, 1024, 128], + "input_bitwidth": 1, + "hidden_bitwidth": 1, + "output_bitwidth": 4, + "input_fanin": 8, + "hidden_fanin": 8, + "output_fanin": 8, + "input_dropout": 0.01, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 0, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-s": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 1, + "hidden_bitwidth": 1, + "output_bitwidth": 4, + "input_fanin": 8, + "hidden_fanin": 8, + "output_fanin": 8, + "input_dropout": 0.01, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 5, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-s-1.1": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 1, + "hidden_bitwidth": 1, + "output_bitwidth": 4, + "input_fanin": 6, + "hidden_fanin": 6, + "output_fanin": 6, + "input_dropout": 0.1, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 18, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-m": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 1, + "hidden_bitwidth": 1, + "output_bitwidth": 4, + "input_fanin": 10, + "hidden_fanin": 10, + "output_fanin": 10, + "input_dropout": 0.01, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 2, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-m-1.1": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 2, + "hidden_bitwidth": 2, + "output_bitwidth": 4, + "input_fanin": 5, + "hidden_fanin": 5, + "output_fanin": 5, + "input_dropout": 0.1, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 20, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-m-1.2": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 3, + "hidden_bitwidth": 3, + "output_bitwidth": 4, + "input_fanin": 3, + "hidden_fanin": 3, + "output_fanin": 3, + "input_dropout": 0.01, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 0, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-m-1.3": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 1, + "hidden_bitwidth": 2, + "output_bitwidth": 4, + "input_fanin": 10, + "hidden_fanin": 5, + "output_fanin": 5, + "input_dropout": 0.1, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 6, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-l": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 1, + "hidden_bitwidth": 1, + "output_bitwidth": 4, + "input_fanin": 12, + "hidden_fanin": 12, + "output_fanin": 12, + "input_dropout": 0.01, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 0, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-l-1.1": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 2, + "hidden_bitwidth": 2, + "output_bitwidth": 4, + "input_fanin": 6, + "hidden_fanin": 6, + "output_fanin": 6, + "input_dropout": 0.1, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 12, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-l-1.2": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 1, + "hidden_bitwidth": 2, + "output_bitwidth": 4, + "input_fanin": 12, + "hidden_fanin": 6, + "output_fanin": 6, + "input_dropout": 0.1, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 6, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, + "mnist-l-1.3": { + "hidden_layers": [1024, 1024, 1024, 1024, 1024, 128], + "input_bitwidth": 1, + "hidden_bitwidth": 3, + "output_bitwidth": 4, + "input_fanin": 12, + "hidden_fanin": 4, + "output_fanin": 6, + "input_dropout": 0.1, + "weight_decay": 1e-3, + "batch_size": 1024, + "epochs": 1000, + "learning_rate": 1e-3, + "seed": 6, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, + }, +} + +# A dictionary, so we can set some defaults if necessary +model_config = { + "hidden_layers": None, + "input_bitwidth": None, + "hidden_bitwidth": None, + "output_bitwidth": None, + "input_fanin": None, + "hidden_fanin": None, + "output_fanin": None, + "input_dropout": None, +} + +training_config = { + "weight_decay": None, + "batch_size": None, + "epochs": None, + "learning_rate": None, + "seed": None, +} + +other_options = { + "cuda": None, + "log_dir": None, + "checkpoint": None, +} + +def train(model, datasets, train_cfg, options): + # Create data loaders for training and inference: + train_loader = DataLoader(datasets["train"], batch_size=train_cfg['batch_size'], shuffle=True, num_workers=2) + val_loader = DataLoader(datasets["valid"], batch_size=train_cfg['batch_size'], shuffle=False, num_workers=2) + test_loader = DataLoader(datasets["test"], batch_size=train_cfg['batch_size'], shuffle=False, num_workers=2) + + # Configure optimizer + weight_decay = train_cfg["weight_decay"] + decay_exclusions = ["bn", "bias", "learned_value"] # Make a list of parameters name fragments which will ignore weight decay TODO: make this list part of the train_cfg + decay_params = [] + no_decay_params = [] + for pname, params in model.named_parameters(): + if params.requires_grad: + if reduce(lambda a,b: a or b, map(lambda x: x in pname, decay_exclusions)): # check if the current label should be excluded from weight decay + #print("Disabling weight decay for %s" % (pname)) + no_decay_params.append(params) + else: + #print("Enabling weight decay for %s" % (pname)) + decay_params.append(params) + #else: + #print("Ignoring %s" % (pname)) + params = [{'params': decay_params, 'weight_decay': weight_decay}, + {'params': no_decay_params, 'weight_decay': 0.0}] + optimizer = optim.AdamW(params, lr=train_cfg['learning_rate'], betas=(0.5, 0.999), weight_decay=weight_decay) + + # Configure scheduler + steps = len(train_loader) + scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=steps*100, T_mult=1) + + # Configure criterion + criterion = nn.CrossEntropyLoss() + + # Push the model to the GPU, if necessary + if options["cuda"]: + model.cuda() + + # Setup tensorboard + writer = SummaryWriter(options["log_dir"]) + + # Main training loop + maxAcc = 0.0 + num_epochs = train_cfg["epochs"] + for epoch in range(0, num_epochs): + # Train for this epoch + model.train() + accLoss = 0.0 + correct = 0 + for batch_idx, (data, target) in enumerate(train_loader): + if options["cuda"]: + data, target = data.cuda(), target.cuda() + optimizer.zero_grad() + output = model(data) + loss = criterion(output, target) + pred = output.detach().max(1, keepdim=True)[1] + target_label = target.detach().unsqueeze(1) + curCorrect = pred.eq(target_label).long().sum() + curAcc = 100.0*curCorrect / len(data) + correct += curCorrect + accLoss += loss.detach()*len(data) + loss.backward() + optimizer.step() + scheduler.step() + + # Log stats to tensorboard + #writer.add_scalar('train_loss', loss.detach().cpu().numpy(), epoch*steps + batch_idx) + #writer.add_scalar('train_accuracy', curAcc.detach().cpu().numpy(), epoch*steps + batch_idx) + #g = optimizer.param_groups[0] + #writer.add_scalar('LR', g['lr'], epoch*steps + batch_idx) + + accLoss /= len(train_loader.dataset) + accuracy = 100.0*correct / len(train_loader.dataset) + print(f"Epoch: {epoch}/{num_epochs}\tTrain Acc (%): {accuracy.detach().cpu().numpy():.2f}\tTrain Loss: {accLoss.detach().cpu().numpy():.3e}") + #for g in optimizer.param_groups: + # print("LR: {:.6f} ".format(g['lr'])) + # print("LR: {:.6f} ".format(g['weight_decay'])) + writer.add_scalar('avg_train_loss', accLoss.detach().cpu().numpy(), (epoch+1)*steps) + writer.add_scalar('avg_train_accuracy', accuracy.detach().cpu().numpy(), (epoch+1)*steps) + val_accuracy = test(model, val_loader, options["cuda"]) + test_accuracy = test(model, test_loader, options["cuda"]) + modelSave = { 'model_dict': model.state_dict(), + 'optim_dict': optimizer.state_dict(), + 'val_accuracy': val_accuracy, + 'test_accuracy': test_accuracy, + 'epoch': epoch} + torch.save(modelSave, options["log_dir"] + "/checkpoint.pth") + if(maxAcc= best: + break + else: + if verbose: + print(best) + best = nodes + shutil.copy(tmp_file_path, output_file_path) + os.remove(tmp_file_path) + return best + +def tech_map_circuit(circuit_file, output_blif, input_bitwidth, output_bitwidth, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"&r {circuit_file}; &lnetmap -I {input_bitwidth} -O {output_bitwidth}; write {output_blif}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + if verbose: + print(out) + print(err) + return out, err + +def pipeline_tech_mapped_circuit(circuit_file, output_verilog, num_registers, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"read {circuit_file}; print_stats; pipe -L {num_registers}; print_stats; retime -M 4; print_stats; sweep; print_stats; write_verilog -fm {output_verilog}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + lut_re = re.compile(_lut_re_str) + nodes = int(lut_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(out) + print(err) + return nodes, out, err + +def tech_map_to_verilog(circuit_file, output_verilog, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"read {circuit_file}; print_stats; write_verilog -fm {output_verilog}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + lut_re = re.compile(_lut_re_str) + nodes = int(lut_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(out) + print(err) + return nodes, out, err + +def evaluate_accuracy(circuit_file, sim_output_file, reference_txt, output_bitwidth, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + if circuit_file.endswith(".aig"): + cmd = [f"{abc_path}/abc", '-c', f"&r {circuit_file}; &lneteval -O {output_bitwidth} {sim_output_file} {reference_txt}"] + elif circuit_file.endswith(".blif"): + cmd = [f"{abc_path}/abc", '-c', f"read {circuit_file}; strash; &get; &lneteval -O {output_bitwidth} {sim_output_file} {reference_txt}"] + else: + raise ValueError(f"Unsupported file type: {circuit_file}") + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + acc_re = re.compile(_acc_re_str) + accuracy = float(acc_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(accuracy) + print(out) + print(err) + return accuracy, out, err # TODO: accuracy %, time + +def generate_prepare_script_string(num_layers, path): + prepare_script_template = """\ +# This script prepares experiments in ABC by deriving intermediate simulation patterns + +# Assuming that verilog/BENCH for each layer of the network are in files "ver/layer{{0,1,2,..}}.v" +# and input/output patterns are the network are in files {{train,test}}_{{input,output}}.txt + + +# ==================================================================================== +# Read the layers from Verilog/BENCH files +{read_layers_string} + +# ==================================================================================== +# Convert input patterns into the internal binary representation +&lnetread {path}/train_input.txt {path}/train.sim +&lnetread {path}/test_input.txt {path}/test.sim + + +# ==================================================================================== +# Generate training simulation info for the inputs of each layer +{simulate_layers_string} + +# ==================================================================================== +# Combine all layers into one monolithic AIG for the whole network (layers.aig) +{gen_monolithic_aig_string} +""" + read_layer_template = "&lnetread {path}/ver/layer{i}.v; &ps; &w {path}/layer{i}.aig\n" + simulate_layer_template = "&r {path}/layer{i}.aig; &lnetsim {path}/train{it}.sim {path}/train{ip1}.sim\n" + gen_monolithic_aig_template = "putontop {layers_aig_string}; st; ps; write {path}/layers.aig\n" + read_layers_string = "" + simulate_layers_string = "" + layers_aig_string = "" + for i in range(num_layers): + read_layers_string += read_layer_template.format(i=i, path=path) + simulate_layers_string += simulate_layer_template.format(i=i, it="" if i == 0 else i, ip1=i+1, path=path) + layers_aig_string += "{path}/layer{i}.aig ".format(i=i, path=path) + gen_monolithic_aig_string = gen_monolithic_aig_template.format(layers_aig_string=layers_aig_string.strip(), path=path) + return prepare_script_template.format( path=path, + read_layers_string=read_layers_string, + simulate_layers_string=simulate_layers_string, + gen_monolithic_aig_string=gen_monolithic_aig_string) + + +def generate_opt_script_string(module_list, path, num_registers, rarity=0, opt_cmd="&lnetopt"): + opt_script_template = """\ +# Generating script with rarity = {rarity}. + +# ---- rarity = {rarity} ------------------------------------------------------------------------------------------------------- +{optimise_with_rarity_string} + +{gen_monolithic_aig_string} + +{technology_map_layers_string} + +{gen_monolithic_blif_string} + +read {path}/blif/layers_opt.blif; ps; pipe -L {num_registers}; ps; retime -M 4; ps; sweep; ps; write_verilog -fm {path}/ver/layers_opt_p{num_registers}.v + +&r {path}/aig/layers_opt.aig; &lnetsim {path}/train.sim {path}/train.simo +&r {path}/aig/layers_opt.aig; &lneteval -O 2 {path}/train.simo {path}/train_output.txt + +&r {path}/aig/layers_opt.aig; &lnetsim {path}/test.sim {path}/test.simo +&r {path}/aig/layers_opt.aig; &lneteval -O 2 {path}/test.simo {path}/test_output.txt + +""" + optimise_with_rarity_template = "&r {path}/layer{i}.aig; &ps; {opt_cmd} -I {fanin_bits} -O {fanout_bits} -R {rarity} {path}/train{it}.sim; &w {path}/aig/layer{i}_opt.aig; &ps; time\n" + technology_map_layer_template = "&r {path}/aig/layer{i}_opt.aig; &lnetmap -I {fanin_bits} -O {fanout_bits}; write {path}/blif/layer{i}_opt.blif; write_verilog -fm {path}/ver/layer{i}_opt.v\n" + gen_monolithic_aig_template = "putontop {aig_layers_string}; st; ps; write {path}/aig/layers_opt.aig\n" + gen_monolithic_blif_template = "putontop {blif_layers_string}; sw; ps; write {path}/blif/layers_opt.blif\n" + num_layers = len(module_list) # TODO: fetch number of layers from the model + optimise_with_rarity_string = "" + technology_map_layers_string = "" + aig_layers_string = "" + blif_layers_string = "" + for i in range(num_layers): + # Read in fanin/fanout bits + # Add assertion that fanin/fanout bits for all neuron is that same + layer = module_list[i] + _, input_bitwidth = layer.input_quant.get_scale_factor_bits() + _, output_bitwidth = layer.output_quant.get_scale_factor_bits() + num_indices = len(layer.neuron_truth_tables[0]) + fanin_bits = input_bitwidth*num_indices + fanout_bits = output_bitwidth + + # Generate optimisation script. + optimise_with_rarity_string += optimise_with_rarity_template.format(fanin_bits=fanin_bits, fanout_bits=fanout_bits, it="" if i == 0 else i, i=i, path=path, rarity=rarity, opt_cmd=opt_cmd) + technology_map_layers_string += technology_map_layer_template.format(fanin_bits=fanin_bits, fanout_bits=fanout_bits, i=i, path=path) + aig_layers_string += "{path}/aig/layer{i}_opt.aig ".format(i=i, path=path) + blif_layers_string += "{path}/blif/layer{i}_opt.blif ".format(i=i, path=path) + gen_monolithic_aig_string = gen_monolithic_aig_template.format(aig_layers_string=aig_layers_string.strip(), path=path) + gen_monolithic_blif_string = gen_monolithic_blif_template.format(blif_layers_string=blif_layers_string.strip(), path=path) + return opt_script_template.format( rarity=rarity, + num_registers=num_registers, + path=path, + optimise_with_rarity_string=optimise_with_rarity_string, + gen_monolithic_aig_string=gen_monolithic_aig_string, + technology_map_layers_string=technology_map_layers_string, + gen_monolithic_blif_string=gen_monolithic_blif_string) + + diff --git a/src/logicnets/nn.py b/src/logicnets/nn.py index dc4d53796..8a793993a 100644 --- a/src/logicnets/nn.py +++ b/src/logicnets/nn.py @@ -47,10 +47,40 @@ def generate_truth_tables(model: nn.Module, verbose: bool = False) -> None: model.training = training # TODO: Create a container module which performs this function. -def lut_inference(model: nn.Module) -> None: +def lut_inference(model: nn.Module, track_used_luts: bool = False) -> None: for name, module in model.named_modules(): if type(module) == SparseLinearNeq: - module.lut_inference() + module.lut_inference(track_used_luts=track_used_luts) + +# TODO: Create a container module which performs this function. +def save_luts(model: nn.Module, path: str) -> None: + lut_dict = {} + for name, module in model.named_modules(): + if type(module) == SparseLinearNeq: + luts = module.neuron_truth_tables + indices = list(map(lambda x: x[0], luts)) + tt_inputs = list(map(lambda x: x[1], luts)) + tt_input_bin_str = list(map(lambda x: list(map(lambda y: list(map(lambda z: module.input_quant.get_bin_str(z), y)), x)), tt_inputs)) + tt_float_outputs = list(map(lambda x: x[2], luts)) + tt_bin_outputs = list(map(lambda x: x[3], luts)) + tt_outputs_bin_str = list(map(lambda x: list(map(lambda y: module.output_quant.get_bin_str(y), x)), tt_bin_outputs)) + histogram = module.used_luts_histogram + lut_dict[name] = { + 'indices': indices, + 'input_state_space': tt_inputs, + 'input_state_space_bin_str': tt_input_bin_str, + 'output_state_space_float': tt_float_outputs, + 'output_state_space_bin': tt_bin_outputs, + 'output_state_space_bin_str': tt_outputs_bin_str, + 'histogram': histogram, + } + torch.save(lut_dict, path) + +# TODO: Create a container module which performs this function. +def load_histograms(model: nn.Module, lut_dict: dict) -> None: + for name, module in model.named_modules(): + if name in lut_dict.keys(): + module.used_luts_histogram = lut_dict[name]['histogram'] # TODO: Create a container module which performs this function. def neq_inference(model: nn.Module) -> None: @@ -60,7 +90,7 @@ def neq_inference(model: nn.Module) -> None: # TODO: Should this go in with the other verilog functions? # TODO: Support non-linear topologies -def module_list_to_verilog_module(module_list: nn.ModuleList, module_name: str, output_directory: str, add_registers: bool = True, generate_bench: bool = True): +def module_list_to_verilog_module(module_list: nn.ModuleList, module_name: str, output_directory: str, add_registers: bool = True, generate_bench: bool = True, freq_thresh = None): input_bitwidth = None output_bitwidth = None module_contents = "" @@ -68,7 +98,7 @@ def module_list_to_verilog_module(module_list: nn.ModuleList, module_name: str, m = module_list[i] if type(m) == SparseLinearNeq: module_prefix = f"layer{i}" - module_input_bits, module_output_bits = m.gen_layer_verilog(module_prefix, output_directory, generate_bench=generate_bench) + module_input_bits, module_output_bits = m.gen_layer_verilog(module_prefix, output_directory, freq_thresh=freq_thresh, generate_bench=generate_bench) if i == 0: input_bitwidth = module_input_bits if i == len(module_list)-1: @@ -115,6 +145,8 @@ def __init__(self, in_features: int, out_features: int, input_quant, output_quan self.neuron_truth_tables = None self.apply_input_quant = apply_input_quant self.apply_output_quant = apply_output_quant + self.track_used_luts = False + self.used_luts_histogram = None def lut_cost(self): """ @@ -144,7 +176,7 @@ def lut_cost(self): # TODO: Move the verilog string templates to elsewhere # TODO: Move this to another class # TODO: Update this code to support custom bitwidths per input/output - def gen_layer_verilog(self, module_prefix, directory, generate_bench: bool = True): + def gen_layer_verilog(self, module_prefix, directory, freq_thresh = None, generate_bench: bool = True): _, input_bitwidth = self.input_quant.get_scale_factor_bits() _, output_bitwidth = self.output_quant.get_scale_factor_bits() input_bitwidth, output_bitwidth = int(input_bitwidth), int(output_bitwidth) @@ -155,7 +187,7 @@ def gen_layer_verilog(self, module_prefix, directory, generate_bench: bool = Tru for index in range(self.out_features): module_name = f"{module_prefix}_N{index}" indices, _, _, _ = self.neuron_truth_tables[index] - neuron_verilog = self.gen_neuron_verilog(index, module_name) # Generate the contents of the neuron verilog + neuron_verilog = self.gen_neuron_verilog(index, module_name, freq_thresh=freq_thresh) # Generate the contents of the neuron verilog with open(f"{directory}/{module_name}.v", "w") as f: f.write(neuron_verilog) if generate_bench: @@ -175,7 +207,7 @@ def gen_layer_verilog(self, module_prefix, directory, generate_bench: bool = Tru # TODO: Move the verilog string templates to elsewhere # TODO: Move this to another class - def gen_neuron_verilog(self, index, module_name): + def gen_neuron_verilog(self, index, module_name, freq_thresh=None): indices, input_perm_matrix, float_output_states, bin_output_states = self.neuron_truth_tables[index] _, input_bitwidth = self.input_quant.get_scale_factor_bits() _, output_bitwidth = self.output_quant.get_scale_factor_bits() @@ -188,7 +220,11 @@ def gen_neuron_verilog(self, index, module_name): val = input_perm_matrix[i,idx] entry_str += self.input_quant.get_bin_str(val) res_str = self.output_quant.get_bin_str(bin_output_states[i]) - lut_string += f"\t\t\t{int(cat_input_bitwidth)}'b{entry_str}: M1r = {int(output_bitwidth)}'b{res_str};\n" + if (freq_thresh is None) or (self.used_luts_histogram[index][i] >= freq_thresh): + lut_string += f"\t\t\t{int(cat_input_bitwidth)}'b{entry_str}: M1r = {int(output_bitwidth)}'b{res_str};\n" + # Add a default "don't care" statement + default_string = int(output_bitwidth) * 'x' + lut_string += f"\t\t\tdefault: M1r = {int(output_bitwidth)}'b{default_string};\n" return generate_lut_verilog(module_name, int(cat_input_bitwidth), int(output_bitwidth), lut_string) # TODO: Move the string templates to bench.py @@ -212,8 +248,9 @@ def gen_neuron_bench(self, index, module_name): lut_string += generate_lut_input_string(int(cat_input_bitwidth)) return generate_lut_bench(int(cat_input_bitwidth), int(output_bitwidth), lut_string) - def lut_inference(self): + def lut_inference(self, track_used_luts=False): self.is_lut_inference = True + self.track_used_luts = track_used_luts self.input_quant.bin_output() self.output_quant.bin_output() @@ -223,7 +260,7 @@ def neq_inference(self): self.output_quant.float_output() # TODO: This function might be a useful utility outside of this class.. - def table_lookup(self, connected_input: Tensor, input_perm_matrix: Tensor, bin_output_states: Tensor) -> Tensor: + def table_lookup(self, connected_input: Tensor, input_perm_matrix: Tensor, bin_output_states: Tensor, neuron_lut_histogram=None) -> Tensor: fan_in_size = connected_input.shape[1] ci_bcast = connected_input.unsqueeze(2) # Reshape to B x Fan-in x 1 pm_bcast = input_perm_matrix.t().unsqueeze(0) # Reshape to 1 x Fan-in x InputStates @@ -232,17 +269,29 @@ def table_lookup(self, connected_input: Tensor, input_perm_matrix: Tensor, bin_o if not (matches == torch.ones_like(matches,dtype=matches.dtype)).all(): raise Exception(f"One or more vectors in the input is not in the possible input state space") indices = torch.argmax(eq.type(torch.int64),dim=1) + if self.track_used_luts: + # TODO: vectorize this loop + for i in indices: + neuron_lut_histogram[i] += 1 return bin_output_states[indices] def lut_forward(self, x: Tensor) -> Tensor: if self.apply_input_quant: x = self.input_quant(x) # Use this to fetch the bin output of the input, if the input isn't already in binary format + # TODO: Put this in a child class(?) + # TODO: Add support for non-uniform fan-in + if self.track_used_luts: + if self.used_luts_histogram is None: + self.used_luts_histogram = self.out_features * [None] + for i in range(self.out_features): + self.used_luts_histogram[i] = torch.zeros(size=(len(self.neuron_truth_tables[i][2]),), dtype=torch.int64) y = torch.zeros((x.shape[0],self.out_features)) # Perform table lookup for each neuron output for i in range(self.out_features): indices, input_perm_matrix, float_output_states, bin_output_states = self.neuron_truth_tables[i] + neuron_lut_histogram = self.used_luts_histogram[i] if self.track_used_luts else None connected_input = x[:,indices] - y[:,i] = self.table_lookup(connected_input, input_perm_matrix, bin_output_states) + y[:,i] = self.table_lookup(connected_input, input_perm_matrix, bin_output_states, neuron_lut_histogram=neuron_lut_histogram) return y def forward(self, x: Tensor) -> Tensor: diff --git a/src/logicnets/synthesis.py b/src/logicnets/synthesis.py index 8193a6369..4856640c4 100644 --- a/src/logicnets/synthesis.py +++ b/src/logicnets/synthesis.py @@ -14,10 +14,27 @@ import os import subprocess +import shutil from shutil import which +import glob + +from .abc import verilog_bench_to_aig,\ + txt_to_sim,\ + simulate_circuit,\ + putontop_aig,\ + putontop_blif,\ + optimize_bdd_network,\ + evaluate_accuracy,\ + tech_map_circuit,\ + iterative_mfs2_optimize,\ + pipeline_tech_mapped_circuit,\ + tech_map_to_verilog +from .verilog import generate_abc_verilog_wrapper,\ + fix_abc_module_name #xcvu9p-flgb2104-2-i # TODO: Add option to perform synthesis on a remote server +# Synthesise design with vivado and get resource counts def synthesize_and_get_resource_counts(verilog_dir, top_name, fpga_part = "xcku3p-ffva676-1-e", clk_name="clk", clk_period_ns=5.0, post_synthesis = 0): # old part : "xczu3eg-sbva484-1-i" # ensure that the OH_MY_XILINX envvar is set @@ -55,3 +72,104 @@ def synthesize_and_get_resource_counts(verilog_dir, top_name, fpga_part = "xcku3 else: ret["fmax_mhz"] = 1000.0 / (clk_period_ns - ret["WNS"]) return ret + +# Optimize the design with ABC +def synthesize_and_get_resource_counts_with_abc(verilog_dir, module_list, pipeline_stages=0, freq_thresh=0, train_input_txt="train_input.txt", train_output_txt="train_output.txt", test_input_txt="test_input.txt", test_output_txt="test_output.txt", bdd_opt_cmd="lnetopt", verbose=False): + if "ABC_ROOT" not in os.environ: + raise Exception("The environment variable ABC_ROOT is not defined.") + abc_path = os.environ["ABC_ROOT"] + + # Create directories and symlinks ready for processing with ABC + project_prefix = "abc" + abc_project_root = f"{verilog_dir}/{project_prefix}" + verilog_bench_dir = f"{abc_project_root}/ver" + aig_dir = f"{abc_project_root}/aig" + blif_dir = f"{abc_project_root}/blif" + veropt_dir = f"{abc_project_root}/veropt" + if not os.path.exists(verilog_bench_dir): + os.makedirs(verilog_bench_dir) + if not os.path.exists(aig_dir): + os.makedirs(aig_dir) + if not os.path.exists(blif_dir): + os.makedirs(blif_dir) + if not os.path.exists(veropt_dir): + os.makedirs(veropt_dir) + # Fetch the right source files from the verilog directory + source_files = glob.glob(f"{verilog_dir}/logicnet.v") + [f"{verilog_dir}/layer{i}.v" for i in range(len(module_list))] + glob.glob(f"{verilog_dir}/*.bench") + for f in source_files: + shutil.copy(f, verilog_bench_dir) + # Fetch the I/O files + for f in list(map(lambda x: f"{verilog_dir}/{x}", [train_input_txt, train_output_txt, test_input_txt, test_output_txt])): + shutil.copy(f, f"{abc_project_root}") + + # Preparation - model / I/O conversion + # Convert txt inputs into the sim format + out, err = txt_to_sim(train_input_txt, "train.sim", working_dir=abc_project_root, verbose=verbose) + out, err = txt_to_sim(test_input_txt, "test.sim", working_dir=abc_project_root) + + # Create AIGs from verilog + for i in range(len(module_list)): + nodes, out, err = verilog_bench_to_aig(f"ver/layer{i}.v", f"aig/layer{i}.aig", working_dir=abc_project_root, verbose=verbose) + + # Simulate each layer + for i in range(len(module_list)): + out, err = simulate_circuit(f"aig/layer{i}.aig", f"train{i}.sim" if i != 0 else "train.sim", f"train{i+1}.sim", working_dir=abc_project_root, verbose=verbose) + + # Synthesis + average_tt_pcts = [] + for i in range(len(module_list)): + _, input_bitwidth = module_list[i].input_quant.get_scale_factor_bits() + _, output_bitwidth = module_list[i].output_quant.get_scale_factor_bits() + indices, _, _, _ = module_list[i].neuron_truth_tables[0] + fanin = len(indices) + nodes, tt_pct, time, out, err = optimize_bdd_network(f"aig/layer{i}.aig", f"aig/layer{i}_full.aig", int(input_bitwidth*fanin), int(output_bitwidth), freq_thresh, f"train{i}.sim" if i != 0 else "train.sim", opt_cmd=bdd_opt_cmd, working_dir=abc_project_root, verbose=verbose) + average_tt_pcts.append(tt_pct) + + # Technology mapping + for i in range(len(module_list)): + _, input_bitwidth = module_list[i].input_quant.get_scale_factor_bits() + _, output_bitwidth = module_list[i].output_quant.get_scale_factor_bits() + indices, _, _, _ = module_list[i].neuron_truth_tables[0] + fanin = len(indices) + out, err = tech_map_circuit(f"aig/layer{i}_full.aig", f"blif/layer{i}_full.blif", int(input_bitwidth*fanin), int(output_bitwidth), working_dir=abc_project_root, verbose=verbose) + + # Generate monolithic circuits + if len(module_list) > 1: + nodes, out, err = putontop_aig([f"aig/layer{i}_full.aig" for i in range(len(module_list))], f"aig/layers_full.aig", working_dir=abc_project_root, verbose=verbose) + nodes, out, err = putontop_blif([f"blif/layer{i}_full.blif" for i in range(len(module_list))], f"blif/layers_full.blif", working_dir=abc_project_root, verbose=verbose) + else: + shutil.copy(f"{aig_dir}/layer0_full.aig", f"{aig_dir}/layers_full.aig") + shutil.copy(f"{blif_dir}/layer0_full.blif", f"{blif_dir}/layers_full.blif") + + # Generic logic synthesis optimizations + nodes = iterative_mfs2_optimize(circuit_file=f"blif/layers_full.blif", output_file=f"blif/layers_full_opt.blif", tmp_file="blif/tmp.blif", max_loop=100, working_dir=abc_project_root, verbose=verbose) + + # Generate verilog, with or without pipelining + if pipeline_stages == 0: + nodes, out, err = tech_map_to_verilog(circuit_file=f"blif/layers_full_opt.blif", output_verilog=f"veropt/layers_full_opt.v", working_dir=abc_project_root, verbose=verbose) + else: + nodes, out, err = pipeline_tech_mapped_circuit(circuit_file=f"blif/layers_full_opt.blif", output_verilog=f"veropt/layers_full_opt.v", num_registers=num_registers, working_dir=abc_project_root, verbose=verbose) + fix_abc_module_name(f"{veropt_dir}/layers_full_opt.v", f"{veropt_dir}/layers_full_opt.v", "\\aig", "layers_full_opt", add_timescale=True) + + # Generate top-level entity wrapper + _, input_bitwidth = module_list[0].input_quant.get_scale_factor_bits() + _, output_bitwidth = module_list[-1].output_quant.get_scale_factor_bits() + input_bitwidth, output_bitwidth = int(input_bitwidth), int(output_bitwidth) + total_input_bits = module_list[0].in_features*input_bitwidth + total_output_bits = module_list[-1].out_features*output_bitwidth + module_name="logicnet" + veropt_wrapper_str = generate_abc_verilog_wrapper(module_name=module_name, input_name="M0", input_bits=total_input_bits, output_name=f"M{len(module_list)}", output_bits=total_output_bits, submodule_name="layers_full_opt", num_registers=pipeline_stages) + with open(f"{veropt_dir}/{module_name}.v", "w") as f: + f.write(veropt_wrapper_str) + + # Evaluation + # Training set: + _, output_bitwidth = module_list[-1].output_quant.get_scale_factor_bits() + out, err = simulate_circuit(f"blif/layers_full_opt.blif", "train.sim", "train.simo", working_dir=abc_project_root, verbose=verbose) + train_accuracy, out, err = evaluate_accuracy(f"blif/layers_full_opt.blif", "train.simo", train_output_txt, int(output_bitwidth), working_dir=abc_project_root, verbose=verbose) + # Test set: + out, err = simulate_circuit(f"blif/layers_full_opt.blif", "test.sim", "test.simo", working_dir=abc_project_root, verbose=verbose) + test_accuracy, out, err = evaluate_accuracy(f"blif/layers_full_opt.blif", "test.simo", test_output_txt, int(output_bitwidth), working_dir=abc_project_root, verbose=verbose) + + return train_accuracy, test_accuracy, nodes, average_tt_pcts + diff --git a/src/logicnets/verilog.py b/src/logicnets/verilog.py index f073a4692..2911979e1 100644 --- a/src/logicnets/verilog.py +++ b/src/logicnets/verilog.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np + def generate_register_verilog(module_name="myreg", param_name="DataWidth", input_name="data_in", output_name="data_out"): register_template = """\ module {module_name} #(parameter {param_name}=16) ( @@ -91,3 +93,47 @@ def generate_neuron_connection_verilog(input_indices, input_bitwidth): connection_string += ", " return connection_string +def fix_abc_module_name(input_verilog_file, output_verilog_file, old_module_name, new_module_name, add_timescale: bool = False): + with open(input_verilog_file, 'r') as f: + lines = f.readlines() + with open(output_verilog_file, 'w') as f: + if add_timescale: + f.write("`timescale 1 ps / 1 ps\n") + for l in lines: + if l.__contains__(f"module {old_module_name}"): + if add_timescale: + l = f"module {new_module_name} (clock,\n" + else: + l = f"module {new_module_name} (\n" + f.write(l) + +def generate_abc_verilog_wrapper(module_name: str, input_name: str, input_bits: int, output_name: str, output_bits: int, submodule_name: str, num_registers: int, add_timescale: bool = True): + abc_wrapper_template = """\ +{timescale} +module {module_name} (input [{input_bits_1:d}:0] {input_name}, input clk, input rst, output[{output_bits_1:d}:0] {output_name}); +{module_contents} +endmodule\n""" + input_digits = int(np.ceil(np.log10(input_bits))) + output_digits = int(np.ceil(np.log10(output_bits))) + module_contents = [] + module_contents.append(f"{submodule_name} {submodule_name}_inst (") + # Connect inputs + if num_registers > 0: + module_contents.append(f" .clock(clk),") + for i in range(input_bits): + module_contents.append(f" .pi{i:0{input_digits}d}({input_name}[{i}]),") + for i in range(output_bits): + if i < output_bits-1: + module_contents.append(f" .po{i:0{output_digits}d}({output_name}[{i}]),") + else: + module_contents.append(f" .po{i:0{output_digits}d}({output_name}[{i}])") + module_contents.append(f" );\n") + module_contents = "\n".join(module_contents) + return abc_wrapper_template.format( module_name=module_name, + input_name=input_name, + input_bits_1=input_bits-1, + output_name=output_name, + output_bits_1=output_bits-1, + module_contents=module_contents, + timescale="`timescale 1 ps / 1 ps" if add_timescale else "") +