diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index f0c95ff31..0467a8fa4 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -29,7 +29,7 @@ RUN apt-get -qq update && apt-get -qq -y install curl bzip2 \ && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log # Install LogicNets system prerequisites -RUN apt-get -qq update && apt-get -qq -y install verilator build-essential libx11-6 git \ +RUN apt-get -qq update && apt-get -qq -y install verilator build-essential libx11-6 git libreadline-dev \ && apt-get autoclean \ && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log @@ -41,6 +41,16 @@ ENV OHMYXILINX=/workspace/oh-my-xilinx RUN git clone https://github.com/dirjud/Nitro-Parts-lib-Xilinx.git ENV NITROPARTSLIB=/workspace/Nitro-Parts-lib-Xilinx +# Adding LogicNets dependency on ABC +COPY examples/mnist/abc.patch /workspace/ +RUN git clone https://github.com/berkeley-abc/abc.git \ + && cd abc \ + && git checkout 813a0f1ff1ae7512cb7947f54cd3f2ab252848c8 \ + && git apply /workspace/abc.patch \ + && rm -f /workspace/abc.patch \ + && make -j`nproc` +ENV ABC_ROOT=/workspace/abc + # Create the user account to run LogicNets RUN groupadd -g $GID $GNAME RUN useradd -m -u $UID $UNAME -g $GNAME diff --git a/examples/cybersecurity/dump_luts.py b/examples/cybersecurity/dump_luts.py new file mode 100644 index 000000000..01a57663f --- /dev/null +++ b/examples/cybersecurity/dump_luts.py @@ -0,0 +1,119 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + save_luts, \ + module_list_to_verilog_module + +from train import configs, model_config, dataset_config, other_options, test +from dataset import get_preqnt_dataset +from models import UnswNb15NeqModel, UnswNb15LutModel + +if __name__ == "__main__": + parser = ArgumentParser(description="Generate histograms of states used throughout LogicNets") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/unsw_nb15_binarized.npz', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the calculated histograms (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset['train'] = get_preqnt_dataset(dataset_cfg['dataset_file'], split='train') + train_loader = DataLoader(dataset["train"], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset['train'][0] + dataset_length = len(dataset['train']) + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = 1 + model = UnswNb15NeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference of baseline model on training set (%d examples)..." % (dataset_length)) + model.eval() + baseline_accuracy = test(model, train_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + + # Instantiate LUT-based model + lut_model = UnswNb15LutModel(model_cfg) + lut_model.load_state_dict(checkpoint['model_dict']) + + # Generate the truth tables in the LUT module + print("Converting to NEQs to LUTs...") + generate_truth_tables(lut_model, verbose=True) + + # Test the LUT-based model + print("Running inference of LUT-based model training set (%d examples)..." % (dataset_length)) + lut_inference(lut_model, track_used_luts=True) + lut_model.eval() + lut_accuracy = test(lut_model, train_loader, cuda=False) + print("LUT-Based Model accuracy: %f" % (lut_accuracy)) + print("Saving LUTs to %s... " % (options_cfg["log_dir"] + "/luts.pth")) + save_luts(lut_model, options_cfg["log_dir"] + "/luts.pth") + print("Done!") + diff --git a/examples/cybersecurity/models.py b/examples/cybersecurity/models.py index b98ab5dc9..bfbaf2ca5 100644 --- a/examples/cybersecurity/models.py +++ b/examples/cybersecurity/models.py @@ -63,13 +63,15 @@ def __init__(self, model_config): self.verilog_dir = None self.top_module_filename = None self.dut = None + self.verify = True self.logfile = None - def verilog_inference(self, verilog_dir, top_module_filename, logfile: bool = False, add_registers: bool = False): + def verilog_inference(self, verilog_dir, top_module_filename, logfile: bool = False, add_registers: bool = False, verify: bool = True): self.verilog_dir = realpath(verilog_dir) self.top_module_filename = top_module_filename - self.dut = PyVerilator.build(f"{self.verilog_dir}/{self.top_module_filename}", verilog_path=[self.verilog_dir], build_dir=f"{self.verilog_dir}/verilator") + self.dut = PyVerilator.build(f"{self.verilog_dir}/{self.top_module_filename}", verilog_path=[self.verilog_dir], build_dir=f"{self.verilog_dir}/verilator", command_args=("--x-assign","0",)) self.is_verilog_inference = True + self.verify = verify self.logfile = logfile if add_registers: self.latency = len(self.num_neurons) @@ -95,11 +97,8 @@ def verilog_forward(self, x): self.dut.io.clk = 0 for i in range(x.shape[0]): x_i = x[i,:] - y_i = self.pytorch_forward(x[i:i+1,:])[0] xv_i = list(map(lambda z: input_quant.get_bin_str(z), x_i)) - ys_i = list(map(lambda z: output_quant.get_bin_str(z), y_i)) xvc_i = reduce(lambda a,b: a+b, xv_i[::-1]) - ysc_i = reduce(lambda a,b: a+b, ys_i[::-1]) self.dut["M0"] = int(xvc_i, 2) for j in range(self.latency + 1): #print(self.dut.io.M5) @@ -107,9 +106,13 @@ def verilog_forward(self, x): result = f"{res:0{int(total_output_bits)}b}" self.dut.io.clk = 1 self.dut.io.clk = 0 - expected = f"{int(ysc_i,2):0{int(total_output_bits)}b}" result = f"{res:0{int(total_output_bits)}b}" - assert(expected == result) + if self.verify: + y_i = self.pytorch_forward(x[i:i+1,:])[0] + ys_i = list(map(lambda z: output_quant.get_bin_str(z), y_i)) + ysc_i = reduce(lambda a,b: a+b, ys_i[::-1]) + expected = f"{int(ysc_i,2):0{int(total_output_bits)}b}" + assert(expected == result) res_split = [result[i:i+output_bitwidth] for i in range(0, len(result), output_bitwidth)][::-1] yv_i = torch.Tensor(list(map(lambda z: int(z, 2), res_split))) y[i,:] = yv_i diff --git a/examples/cybersecurity/neq2lut.py b/examples/cybersecurity/neq2lut.py index 4302ec304..bcc7ef049 100644 --- a/examples/cybersecurity/neq2lut.py +++ b/examples/cybersecurity/neq2lut.py @@ -20,7 +20,8 @@ from logicnets.nn import generate_truth_tables, \ lut_inference, \ - module_list_to_verilog_module + module_list_to_verilog_module, \ + load_histograms from logicnets.synthesis import synthesize_and_get_resource_counts from logicnets.util import proc_postsynth_file @@ -34,6 +35,8 @@ "checkpoint": None, "generate_bench": False, "add_registers": False, + "histograms": None, + "freq_thresh": None, "simulate_pre_synthesis_verilog": False, "simulate_post_synthesis_verilog": False, } @@ -68,6 +71,10 @@ help="A location to store the log output of the training run and the output model (default: %(default)s)") parser.add_argument('--checkpoint', type=str, required=True, help="The checkpoint file which contains the model weights") + parser.add_argument('--histograms', type=str, default=None, + help="The checkpoint histograms of LUT usage (default: %(default)s)") + parser.add_argument('--freq-thresh', type=int, default=None, + help="Threshold to use to include this truth table into the model (default: %(default)s)") parser.add_argument('--generate-bench', action='store_true', default=False, help="Generate the truth table in BENCH format as well as verilog (default: %(default)s)") parser.add_argument('--dump-io', action='store_true', default=False, @@ -141,9 +148,12 @@ 'test_accuracy': lut_accuracy} torch.save(modelSave, options_cfg["log_dir"] + "/lut_based_model.pth") + if options_cfg["histograms"] is not None: + luts = torch.load(options_cfg["histograms"]) + load_histograms(lut_model, luts) print("Generating verilog in %s..." % (options_cfg["log_dir"])) - module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=options_cfg["generate_bench"], add_registers=options_cfg["add_registers"]) + module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=options_cfg["generate_bench"], add_registers=options_cfg["add_registers"], freq_thresh=options_cfg["freq_thresh"]) print("Top level entity stored at: %s/logicnet.v ..." % (options_cfg["log_dir"])) if args.dump_io: @@ -154,9 +164,10 @@ else: io_filename = None + if args.simulate_pre_synthesis_verilog: print("Running inference simulation of Verilog-based model...") - lut_model.verilog_inference(options_cfg["log_dir"], "logicnet.v", logfile=io_filename, add_registers=options_cfg["add_registers"]) + lut_model.verilog_inference(options_cfg["log_dir"], "logicnet.v", logfile=io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) verilog_accuracy = test(lut_model, test_loader, cuda=False) print("Verilog-Based Model accuracy: %f" % (verilog_accuracy)) @@ -166,7 +177,7 @@ if args.simulate_post_synthesis_verilog: print("Running post-synthesis inference simulation of Verilog-based model...") proc_postsynth_file(options_cfg["log_dir"]) - lut_model.verilog_inference(options_cfg["log_dir"]+"/post_synth", "logicnet_post_synth.v", io_filename, add_registers=options_cfg["add_registers"]) + lut_model.verilog_inference(options_cfg["log_dir"]+"/post_synth", "logicnet_post_synth.v", io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) post_synth_accuracy = test(lut_model, test_loader, cuda=False) print("Post-synthesis Verilog-Based Model accuracy: %f" % (post_synth_accuracy)) diff --git a/examples/cybersecurity/train.py b/examples/cybersecurity/train.py index 3576ae15a..30dcba634 100644 --- a/examples/cybersecurity/train.py +++ b/examples/cybersecurity/train.py @@ -44,6 +44,8 @@ "learning_rate": 1e-1, "seed": 25, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-s-comp": { "hidden_layers": [49, 7], @@ -59,6 +61,8 @@ "learning_rate": 1e-1, "seed": 81, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-m": { "hidden_layers": [593, 256, 128, 128], @@ -74,6 +78,8 @@ "learning_rate": 1e-1, "seed": 20, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-m-comp": { "hidden_layers": [593, 256, 49, 7], @@ -89,6 +95,8 @@ "learning_rate": 1e-1, "seed": 40, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-l": { "hidden_layers": [593, 100, 100, 100], @@ -104,6 +112,8 @@ "learning_rate": 1e-1, "seed": 2, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "nid-l-comp": { "hidden_layers": [593, 100, 25, 5], @@ -119,6 +129,8 @@ "learning_rate": 1e-1, "seed": 83, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, } diff --git a/examples/jet_substructure/README.md b/examples/jet_substructure/README.md index d9a544d55..2b27d8806 100644 --- a/examples/jet_substructure/README.md +++ b/examples/jet_substructure/README.md @@ -72,3 +72,32 @@ our paper below: } ``` +## Testing BLIF Files on the JSC Dataset + +In this section, we show how to take technology-mapped BLIF files, +generate technology-mapped verilog and simulate the verilog on the JSC dataset. + +### Convert BLIF Files into Verilog + +To convert the full BLIF files (as generated from the LogicNets examples, via `neq2lut_abc.py`) into verilog, run the following: + +```bash +python blif2verilog.py --arch --input-blif /layers_full_opt.blif --output-directory +``` + +To convert the layer-wise BLIF files into verilog, run the following: + +```bash +python blif2verilog.py --arch --input-blifs /*.blif --output-directory --generated-module-name-prefix layer0 +``` + +Note, the generated module name prefix will likely have to change if the source files are handled in a different way. + +### Simulate Verilog + +The resultant verilog can be simulated as follows: + +```bash +python simulate_verilog.py --arch --checkpoint --input-verilog /logicnet.v +``` + diff --git a/examples/jet_substructure/blif2verilog.py b/examples/jet_substructure/blif2verilog.py new file mode 100644 index 000000000..929de9610 --- /dev/null +++ b/examples/jet_substructure/blif2verilog.py @@ -0,0 +1,141 @@ +# Copyright (C) 2022 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A script to convert technology-mapped BLIF files to technology mapped verilog. + +import os +import glob +import shutil +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from logicnets.abc import tech_map_to_verilog,\ + putontop_blif,\ + pipeline_tech_mapped_circuit +from logicnets.verilog import fix_abc_module_name,\ + generate_abc_verilog_wrapper + +from train import configs, model_config, dataset_config, test +from models import JetSubstructureNeqModel +from dataset import JetSubstructureDataset + +other_options = { + "output_directory": None, + "input_blifs": None, + "num_registers": None, + "generated_module_name_prefix": None, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Convert a technology-mapped BLIF files into a technology-mapped verilog file, using ABC") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/processed-pythia82-lhc13-all-pt1-50k-r1_h022_e0175_t220_nonu_truth.z', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--dataset-config', type=str, default='config/yaml_IP_OP_config.yml', + help="The file to use to configure the input dataset (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--input-blifs', nargs='+', type=str, required=True, + help="The input BLIF files") + parser.add_argument('--output-directory', type=str, default='./log', + help="The directory which the generated verilog gets stored. (default: %(default)s)") + parser.add_argument('--num-registers', type=int, default=0, + help="The number of registers to add to the generated verilog (default: %(default)s)") + parser.add_argument('--generated-module-name-prefix', type=str, default='\\aig', + help="A prefix which matches the module name in the generated verilog, but no other line of code (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(args.output_directory): + os.makedirs(args.output_directory) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset[args.dataset_split] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split=args.dataset_split) + test_loader = DataLoader(dataset[args.dataset_split], batch_size=1, shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = len(y) + model = JetSubstructureNeqModel(model_cfg) + + abc_project_root = options_cfg['output_directory'] + veropt_dir = options_cfg['output_directory'] + input_blif = "layers_full_opt.blif" + verbose = False + + if len(options_cfg['input_blifs']) > 1: + nodes, out, err = putontop_blif([os.path.realpath(blif) for blif in options_cfg['input_blifs']], input_blif, working_dir=abc_project_root, verbose=verbose) + else: + shutil.copy(os.path.realpath(options_cfg['input_blifs'][0]), f"{abc_project_root}/{input_blif}") + + if options_cfg['num_registers'] == 0: + nodes, out, err = tech_map_to_verilog(circuit_file=input_blif, output_verilog=f"layers_full_opt.v", working_dir=abc_project_root, verbose=verbose) + else: + nodes, out, err = pipeline_tech_mapped_circuit(circuit_file=input_blif, output_verilog=f"layers_full_opt.v", num_registers=options_cfg['num_registers'], working_dir=abc_project_root, verbose=verbose) + + # Fix the resultant verilog file so that it can be simulated + fix_abc_module_name(f"{veropt_dir}/layers_full_opt.v", f"{veropt_dir}/layers_full_opt.v", options_cfg["generated_module_name_prefix"], "layers_full_opt", add_timescale=options_cfg['num_registers'] != 0) + + # Generate top-level entity wrapper + module_list = model.module_list + _, input_bitwidth = module_list[0].input_quant.get_scale_factor_bits() + _, output_bitwidth = module_list[-1].output_quant.get_scale_factor_bits() + input_bitwidth, output_bitwidth = int(input_bitwidth), int(output_bitwidth) + total_input_bits = module_list[0].in_features*input_bitwidth + total_output_bits = module_list[-1].out_features*output_bitwidth + module_name="logicnet" + veropt_wrapper_str = generate_abc_verilog_wrapper(module_name=module_name, input_name="M0", input_bits=total_input_bits, output_name=f"M{len(module_list)}", output_bits=total_output_bits, submodule_name="layers_full_opt", num_registers=options_cfg['num_registers']) + with open(f"{veropt_dir}/{module_name}.v", "w") as f: + f.write(veropt_wrapper_str) + + print(f"Adding Nitro-Parts-Lib to {veropt_dir}") + source_files = glob.glob(f"{os.environ['NITROPARTSLIB']}/*.v") + for f in source_files: + shutil.copy(f, f"{veropt_dir}") + diff --git a/examples/jet_substructure/config/yaml_IP_OP_config.yml b/examples/jet_substructure/config/yaml_IP_OP_config.yml index e238039bf..95befe1fe 100644 --- a/examples/jet_substructure/config/yaml_IP_OP_config.yml +++ b/examples/jet_substructure/config/yaml_IP_OP_config.yml @@ -45,5 +45,5 @@ L1Reg: 0.0001 NormalizeInputs: 1 InputType: Dense ApplyPca: false -PcaDimensions: 10 +PcaDimensions: 12 diff --git a/examples/jet_substructure/dump_luts.py b/examples/jet_substructure/dump_luts.py new file mode 100644 index 000000000..c5acfb8b2 --- /dev/null +++ b/examples/jet_substructure/dump_luts.py @@ -0,0 +1,124 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + save_luts, \ + module_list_to_verilog_module + +from train import configs, model_config, dataset_config, other_options, test +from dataset import JetSubstructureDataset +from models import JetSubstructureNeqModel, JetSubstructureLutModel +from logicnets.synthesis import synthesize_and_get_resource_counts + +if __name__ == "__main__": + parser = ArgumentParser(description="Generate histograms of states used throughout LogicNets") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/processed-pythia82-lhc13-all-pt1-50k-r1_h022_e0175_t220_nonu_truth.z', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--dataset-config', type=str, default='config/yaml_IP_OP_config.yml', + help="The file to use to configure the input dataset (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the calculated histograms (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset['train'] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split="train") + train_loader = DataLoader(dataset["train"], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset['train'][0] + dataset_length = len(dataset['train']) + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = len(y) + model = JetSubstructureNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference of baseline model on training set (%d examples)..." % (dataset_length)) + model.eval() + baseline_accuracy, baseline_avg_roc_auc = test(model, train_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + print("Baseline AVG ROC AUC: %f" % (baseline_avg_roc_auc)) + + # Instantiate LUT-based model + lut_model = JetSubstructureLutModel(model_cfg) + lut_model.load_state_dict(checkpoint['model_dict']) + + # Generate the truth tables in the LUT module + print("Converting to NEQs to LUTs...") + generate_truth_tables(lut_model, verbose=True) + + # Test the LUT-based model + print("Running inference of LUT-based model training set (%d examples)..." % (dataset_length)) + lut_inference(lut_model, track_used_luts=True) + lut_model.eval() + lut_accuracy, lut_avg_roc_auc = test(lut_model, train_loader, cuda=False) + print("LUT-Based Model accuracy: %f" % (lut_accuracy)) + print("LUT-Based AVG ROC AUC: %f" % (lut_avg_roc_auc)) + print("Saving LUTs to %s... " % (options_cfg["log_dir"] + "/luts.pth")) + save_luts(lut_model, options_cfg["log_dir"] + "/luts.pth") + print("Done!") + diff --git a/examples/jet_substructure/models.py b/examples/jet_substructure/models.py index 4d35a86f8..806f485d6 100644 --- a/examples/jet_substructure/models.py +++ b/examples/jet_substructure/models.py @@ -65,13 +65,15 @@ def __init__(self, model_config): self.verilog_dir = None self.top_module_filename = None self.dut = None + self.verify = True self.logfile = None - def verilog_inference(self, verilog_dir, top_module_filename, logfile: bool = False, add_registers: bool = False): + def verilog_inference(self, verilog_dir, top_module_filename, logfile: bool = False, add_registers: bool = False, verify: bool = True): self.verilog_dir = realpath(verilog_dir) self.top_module_filename = top_module_filename - self.dut = PyVerilator.build(f"{self.verilog_dir}/{self.top_module_filename}", verilog_path=[self.verilog_dir], build_dir=f"{self.verilog_dir}/verilator") + self.dut = PyVerilator.build(f"{self.verilog_dir}/{self.top_module_filename}", verilog_path=[self.verilog_dir], build_dir=f"{self.verilog_dir}/verilator", command_args=("--x-assign","0",)) self.is_verilog_inference = True + self.verify = verify self.logfile = logfile if add_registers: self.latency = len(self.num_neurons) @@ -97,11 +99,8 @@ def verilog_forward(self, x): self.dut.io.clk = 0 for i in range(x.shape[0]): x_i = x[i,:] - y_i = self.pytorch_forward(x[i:i+1,:])[0] xv_i = list(map(lambda z: input_quant.get_bin_str(z), x_i)) - ys_i = list(map(lambda z: output_quant.get_bin_str(z), y_i)) xvc_i = reduce(lambda a,b: a+b, xv_i[::-1]) - ysc_i = reduce(lambda a,b: a+b, ys_i[::-1]) self.dut["M0"] = int(xvc_i, 2) for j in range(self.latency + 1): #print(self.dut.io.M5) @@ -109,9 +108,13 @@ def verilog_forward(self, x): result = f"{res:0{int(total_output_bits)}b}" self.dut.io.clk = 1 self.dut.io.clk = 0 - expected = f"{int(ysc_i,2):0{int(total_output_bits)}b}" result = f"{res:0{int(total_output_bits)}b}" - assert(expected == result) + if self.verify: + y_i = self.pytorch_forward(x[i:i+1,:])[0] + ys_i = list(map(lambda z: output_quant.get_bin_str(z), y_i)) + ysc_i = reduce(lambda a,b: a+b, ys_i[::-1]) + expected = f"{int(ysc_i,2):0{int(total_output_bits)}b}" + assert(expected == result) res_split = [result[i:i+output_bitwidth] for i in range(0, len(result), output_bitwidth)][::-1] yv_i = torch.Tensor(list(map(lambda z: int(z, 2), res_split))) y[i,:] = yv_i diff --git a/examples/jet_substructure/neq2lut.py b/examples/jet_substructure/neq2lut.py index 983376b4c..db84a7564 100644 --- a/examples/jet_substructure/neq2lut.py +++ b/examples/jet_substructure/neq2lut.py @@ -20,7 +20,8 @@ from logicnets.nn import generate_truth_tables, \ lut_inference, \ - module_list_to_verilog_module + module_list_to_verilog_module, \ + load_histograms from train import configs, model_config, dataset_config, test from dataset import JetSubstructureDataset @@ -34,6 +35,8 @@ "checkpoint": None, "generate_bench": False, "add_registers": False, + "histograms": None, + "freq_thresh": None, "simulate_pre_synthesis_verilog": False, "simulate_post_synthesis_verilog": False, } @@ -70,6 +73,10 @@ help="A location to store the log output of the training run and the output model (default: %(default)s)") parser.add_argument('--checkpoint', type=str, required=True, help="The checkpoint file which contains the model weights") + parser.add_argument('--histograms', type=str, default=None, + help="The checkpoint histograms of LUT usage (default: %(default)s)") + parser.add_argument('--freq-thresh', type=int, default=None, + help="Threshold to use to include this truth table into the model (default: %(default)s)") parser.add_argument('--generate-bench', action='store_true', default=False, help="Generate the truth table in BENCH format as well as verilog (default: %(default)s)") parser.add_argument('--dump-io', action='store_true', default=False, @@ -146,9 +153,12 @@ 'test_avg_roc_auc': lut_avg_roc_auc} torch.save(modelSave, options_cfg["log_dir"] + "/lut_based_model.pth") + if options_cfg["histograms"] is not None: + luts = torch.load(options_cfg["histograms"]) + load_histograms(lut_model, luts) print("Generating verilog in %s..." % (options_cfg["log_dir"])) - module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=options_cfg["generate_bench"], add_registers=options_cfg["add_registers"]) + module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=options_cfg["generate_bench"], add_registers=options_cfg["add_registers"], freq_thresh=options_cfg["freq_thresh"]) print("Top level entity stored at: %s/logicnet.v ..." % (options_cfg["log_dir"])) if args.dump_io: @@ -161,7 +171,7 @@ if args.simulate_pre_synthesis_verilog: print("Running inference simulation of Verilog-based model...") - lut_model.verilog_inference(options_cfg["log_dir"], "logicnet.v", logfile=io_filename, add_registers=options_cfg["add_registers"]) + lut_model.verilog_inference(options_cfg["log_dir"], "logicnet.v", logfile=io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) verilog_accuracy, verilog_avg_roc_auc = test(lut_model, test_loader, cuda=False) print("Verilog-Based Model accuracy: %f" % (verilog_accuracy)) print("Verilog-Based AVG ROC AUC: %f" % (verilog_avg_roc_auc)) @@ -172,7 +182,7 @@ if args.simulate_post_synthesis_verilog: print("Running post-synthesis inference simulation of Verilog-based model...") proc_postsynth_file(options_cfg["log_dir"]) - lut_model.verilog_inference(options_cfg["log_dir"]+"/post_synth", "logicnet_post_synth.v", io_filename, add_registers=options_cfg["add_registers"]) + lut_model.verilog_inference(options_cfg["log_dir"]+"/post_synth", "logicnet_post_synth.v", io_filename, add_registers=options_cfg["add_registers"], verify=options_cfg["freq_thresh"] is None or options_cfg["freq_thresh"] == 0) post_synth_accuracy, post_synth_avg_roc_auc = test(lut_model, test_loader, cuda=False) print("Post-synthesis Verilog-Based Model accuracy: %f" % (post_synth_accuracy)) print("Post-synthesis Verilog-Based AVG ROC AUC: %f" % (post_synth_avg_roc_auc)) diff --git a/examples/jet_substructure/neq2lut_abc.py b/examples/jet_substructure/neq2lut_abc.py new file mode 100644 index 000000000..1d2bd7495 --- /dev/null +++ b/examples/jet_substructure/neq2lut_abc.py @@ -0,0 +1,171 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from logicnets.nn import generate_truth_tables, \ + lut_inference, \ + module_list_to_verilog_module, \ + load_histograms +from logicnets.synthesis import synthesize_and_get_resource_counts_with_abc + +from train import configs, model_config, dataset_config, test +from dataset import JetSubstructureDataset +from models import JetSubstructureNeqModel, JetSubstructureLutModel +from dataset_dump import dump_io + +other_options = { + "cuda": None, + "log_dir": None, + "checkpoint": None, + "histograms": None, + "freq_thresh": None, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Synthesize convert a PyTorch trained model into verilog using ABC") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/processed-pythia82-lhc13-all-pt1-50k-r1_h022_e0175_t220_nonu_truth.z', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--clock-period', type=float, default=1.0, + help="Target clock frequency to use during Vivado synthesis (default: %(default)s)") + parser.add_argument('--dataset-config', type=str, default='config/yaml_IP_OP_config.yml', + help="The file to use to configure the input dataset (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--log-dir', type=str, default='./log', + help="A location to store the log output of the training run and the output model (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + parser.add_argument('--histograms', type=str, default=None, + help="The checkpoint histograms of LUT usage (default: %(default)s)") + parser.add_argument('--freq-thresh', type=int, default=None, + help="Threshold to use to include this truth table into the model (default: %(default)s)") + parser.add_argument('--num-registers', type=int, default=0, + help="The number of registers to add to the generated verilog (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + if not os.path.exists(config['log_dir']): + os.makedirs(config['log_dir']) + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset["train"] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split="train") + dataset["test"] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split="test") + train_loader = DataLoader(dataset["train"], batch_size=config['batch_size'], shuffle=False) + test_loader = DataLoader(dataset["test"], batch_size=config['batch_size'], shuffle=False) + + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = len(y) + model = JetSubstructureNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference on baseline model...") + model.eval() + baseline_accuracy, baseline_avg_roc_auc = test(model, test_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + print("Baseline AVG ROC AUC: %f" % (baseline_avg_roc_auc)) + + # Run preprocessing on training set. + #train_input_file = config['log_dir'] + "/train_input.txt" + #train_output_file = config['log_dir'] + "/train_output.txt" + #test_input_file = config['log_dir'] + "/test_input.txt" + #test_output_file = config['log_dir'] + "/test_output.txt" + #print(f"Dumping train I/O to {train_input_file} and {train_output_file}") + #dump_io(model, train_loader, train_input_file, train_output_file) + #print(f"Dumping test I/O to {test_input_file} and {test_output_file}") + #dump_io(model, test_loader, test_input_file, test_output_file) + + # Instantiate LUT-based model + lut_model = JetSubstructureLutModel(model_cfg) + lut_model.load_state_dict(checkpoint['model_dict']) + + # Generate the truth tables in the LUT module + print("Converting to NEQs to LUTs...") + generate_truth_tables(lut_model, verbose=True) + + # Test the LUT-based model + print("Running inference on LUT-based model...") + lut_inference(lut_model) + lut_model.eval() + lut_accuracy, lut_avg_roc_auc = test(lut_model, test_loader, cuda=False) + print("LUT-Based Model accuracy: %f" % (lut_accuracy)) + print("LUT-Based AVG ROC AUC: %f" % (lut_avg_roc_auc)) + modelSave = { 'model_dict': lut_model.state_dict(), + 'test_accuracy': lut_accuracy, + 'test_avg_roc_auc': lut_avg_roc_auc} + + torch.save(modelSave, options_cfg["log_dir"] + "/lut_based_model.pth") + if options_cfg["histograms"] is not None: + luts = torch.load(options_cfg["histograms"]) + load_histograms(lut_model, luts) + + print("Generating verilog in %s..." % (options_cfg["log_dir"])) + module_list_to_verilog_module(lut_model.module_list, "logicnet", options_cfg["log_dir"], generate_bench=True, add_registers=False) + print("Top level entity stored at: %s/logicnet.v ..." % (options_cfg["log_dir"])) + + print("Running synthesis and verilog technology-mapped verilog in ABC") + train_accuracy, test_accuracy, nodes, average_care_set_size = synthesize_and_get_resource_counts_with_abc(options_cfg["log_dir"], lut_model.module_list, pipeline_stages=args.num_registers, freq_thresh=args.freq_thresh, train_input_txt="train_input.txt", train_output_txt="train_output.txt", test_input_txt="test_input.txt", test_output_txt="test_output.txt", bdd_opt_cmd="&ttopt", verbose=False) + print(f"Training set accuracy(%): {train_accuracy}") + print(f"Test set accuracy(%): {test_accuracy}") + print(f"LUT6(#): {nodes}") + print(f"Average care set sizes(%): {average_care_set_size}") + diff --git a/examples/jet_substructure/simulate_verilog.py b/examples/jet_substructure/simulate_verilog.py new file mode 100644 index 000000000..0aecf6f44 --- /dev/null +++ b/examples/jet_substructure/simulate_verilog.py @@ -0,0 +1,114 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from argparse import ArgumentParser + +import torch +from torch.utils.data import DataLoader + +from train import configs, model_config, dataset_config, test +from dataset import JetSubstructureDataset +from models import JetSubstructureNeqModel + +other_options = { + "checkpoint": None, + "input_verilog": None, + "num_registers": None, +} + +if __name__ == "__main__": + parser = ArgumentParser(description="Synthesize convert a PyTorch trained model into verilog") + parser.add_argument('--arch', type=str, choices=configs.keys(), default="jsc-s", + help="Specific the neural network model to use (default: %(default)s)") + parser.add_argument('--batch-size', type=int, default=None, metavar='N', + help="Batch size for evaluation (default: %(default)s)") + parser.add_argument('--input-bitwidth', type=int, default=None, + help="Bitwidth to use at the input (default: %(default)s)") + parser.add_argument('--hidden-bitwidth', type=int, default=None, + help="Bitwidth to use for activations in hidden layers (default: %(default)s)") + parser.add_argument('--output-bitwidth', type=int, default=None, + help="Bitwidth to use at the output (default: %(default)s)") + parser.add_argument('--input-fanin', type=int, default=None, + help="Fanin to use at the input (default: %(default)s)") + parser.add_argument('--hidden-fanin', type=int, default=None, + help="Fanin to use for the hidden layers (default: %(default)s)") + parser.add_argument('--output-fanin', type=int, default=None, + help="Fanin to use at the output (default: %(default)s)") + parser.add_argument('--hidden-layers', nargs='+', type=int, default=None, + help="A list of hidden layer neuron sizes (default: %(default)s)") + parser.add_argument('--dataset-file', type=str, default='data/processed-pythia82-lhc13-all-pt1-50k-r1_h022_e0175_t220_nonu_truth.z', + help="The file to use as the dataset input (default: %(default)s)") + parser.add_argument('--dataset-config', type=str, default='config/yaml_IP_OP_config.yml', + help="The file to use to configure the input dataset (default: %(default)s)") + parser.add_argument('--dataset-split', type=str, default='test', choices=['train', 'test'], + help="Dataset to use for evaluation (default: %(default)s)") + parser.add_argument('--checkpoint', type=str, required=True, + help="The checkpoint file which contains the model weights") + parser.add_argument('--input-verilog', type=str, required=True, + help="The input verilog file to simulate") + parser.add_argument('--num-registers', type=int, default=0, + help="The number of pipeline registers in the verilog (default: %(default)s)") + args = parser.parse_args() + defaults = configs[args.arch] + options = vars(args) + del options['arch'] + config = {} + for k in options.keys(): + config[k] = options[k] if options[k] is not None else defaults[k] # Override defaults, if specified. + + # Split up configuration options to be more understandable + model_cfg = {} + for k in model_config.keys(): + model_cfg[k] = config[k] + dataset_cfg = {} + for k in dataset_config.keys(): + dataset_cfg[k] = config[k] + options_cfg = {} + for k in other_options.keys(): + if k == 'cuda': + continue + options_cfg[k] = config[k] + + # Fetch the test set + dataset = {} + dataset[args.dataset_split] = JetSubstructureDataset(dataset_cfg['dataset_file'], dataset_cfg['dataset_config'], split=args.dataset_split) + test_loader = DataLoader(dataset[args.dataset_split], batch_size=config['batch_size'], shuffle=False) + + # Instantiate the PyTorch model + x, y = dataset[args.dataset_split][0] + model_cfg['input_length'] = len(x) + model_cfg['output_length'] = len(y) + model = JetSubstructureNeqModel(model_cfg) + + # Load the model weights + checkpoint = torch.load(options_cfg['checkpoint'], map_location='cpu') + model.load_state_dict(checkpoint['model_dict']) + + # Test the PyTorch model + print("Running inference on baseline model...") + model.eval() + baseline_accuracy, baseline_avg_roc_auc = test(model, test_loader, cuda=False) + print("Baseline accuracy: %f" % (baseline_accuracy)) + print("Baseline AVG ROC AUC: %f" % (baseline_avg_roc_auc)) + + verilog_dir = os.path.dirname(options_cfg["input_verilog"]) + filename = os.path.split(options_cfg["input_verilog"])[-1] + print(f"Running inference simulation of Verilog-based model ({filename})") + model.verilog_inference(verilog_dir, filename, logfile=None, add_registers=options_cfg["num_registers"] != 0, verify=False) + model.latency = options_cfg["num_registers"] + verilog_accuracy, verilog_avg_roc_auc = test(model, test_loader, cuda=False) + print("Verilog-Based Model accuracy: %f" % (verilog_accuracy)) + print("Verilog-Based AVG ROC AUC: %f" % (verilog_avg_roc_auc)) + diff --git a/examples/jet_substructure/train.py b/examples/jet_substructure/train.py index 204068601..70d03ce93 100644 --- a/examples/jet_substructure/train.py +++ b/examples/jet_substructure/train.py @@ -46,6 +46,8 @@ "learning_rate": 1e-3, "seed": 2, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "jsc-m": { "hidden_layers": [64, 32, 32, 32], @@ -61,6 +63,8 @@ "learning_rate": 1e-3, "seed": 3, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, "jsc-l": { "hidden_layers": [32, 64, 192, 192, 16], @@ -76,6 +80,8 @@ "learning_rate": 1e-3, "seed": 16, "checkpoint": None, + "histograms": None, + "freq_thresh": None, }, } diff --git a/examples/jet_substructure/verilog2bitfile.py b/examples/jet_substructure/verilog2bitfile.py new file mode 100644 index 000000000..3dfb7b226 --- /dev/null +++ b/examples/jet_substructure/verilog2bitfile.py @@ -0,0 +1,28 @@ +# Copyright (C) 2021 Xilinx, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from argparse import ArgumentParser + +from logicnets.synthesis import synthesize_and_get_resource_counts + +if __name__ == "__main__": + parser = ArgumentParser(description="Synthesize convert a PyTorch trained model into verilog") + parser.add_argument('--log-dir', type=str, default='./log', required=True, + help="A location to store the log output of the training run and the output model (default: %(default)s)") + parser.add_argument('--clock-period', type=float, default=1.0, + help="Target clock frequency to use during Vivado synthesis (default: %(default)s)") + args = parser.parse_args() + print("Running out-of-context synthesis") + ret = synthesize_and_get_resource_counts(args.log_dir, "logicnet", fpga_part="xcu280-fsvh2892-2L-e", clk_period_ns=args.clock_period, post_synthesis = 1) + diff --git a/src/logicnets/abc.py b/src/logicnets/abc.py new file mode 100644 index 000000000..07fd966bf --- /dev/null +++ b/src/logicnets/abc.py @@ -0,0 +1,332 @@ +# Copyright (C) 2021 Xilinx, Inc +# Copyright (C) 2021 Alan Mishchenko +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import re +import shutil + +_aig_re_str = r'and\s+=\s+\d+' +_lut_re_str = r'nd\s+=\s+\d+' +_acc_re_str = r'The\s+accuracy\s+is\s+\d+\.\d+' +_avg_cs_re_str = r'Average\s+care\s+set\s+is\s+\d+\.\d+' +_elapse_s_re_str = r'elapse:\s+\d+\.\d+' + +def verilog_bench_to_aig(verilog_file, aig_file, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"&lnetread {verilog_file}; &ps; &w {aig_file}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + aig_re = re.compile(_aig_re_str) + nodes = int(aig_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(out) + print(err) + return nodes, out, err # TODO: return the number of nodes + +def txt_to_sim(txt_file, sim_file, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"&lnetread {txt_file} {sim_file}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + if verbose: + print(out) + print(err) + return out, err + +def simulate_circuit(circuit_file, sim_input_file, sim_output_file, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + if circuit_file.endswith(".aig"): + cmd = [f"{abc_path}/abc", '-c', f"&r {circuit_file}; &lnetsim {sim_input_file} {sim_output_file}"] + elif circuit_file.endswith(".blif"): + cmd = [f"{abc_path}/abc", '-c', f"read {circuit_file}; strash; &get; &lnetsim {sim_input_file} {sim_output_file}"] + else: + raise ValueError(f"Unsupported file type: {circuit_file}") + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + if verbose: + print(out) + print(err) + return out, err + +def putontop_aig(aig_files, output_aig_file, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"putontop {' '.join(aig_files)}; strash; print_stats; write {output_aig_file}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + aig_re = re.compile(_aig_re_str) + nodes = int(aig_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(out) + print(err) + return nodes, out, err # TODO: return the number of nodes + +def putontop_blif(blif_files, output_blif_file, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"putontop {' '.join(blif_files)}; sweep; print_stats; write {output_blif_file}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + lut_re = re.compile(_lut_re_str) + nodes = int(lut_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(out) + print(err) + return nodes, out, err # TODO: return the number of nodes + +def optimize_bdd_network(circuit_file, output_file, input_bitwidth, output_bitwidth, rarity, sim_file, opt_cmd="&lnetopt", abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"&r {circuit_file}; &ps; {opt_cmd} -I {input_bitwidth} -O {output_bitwidth} -R {rarity} {sim_file}; &w {output_file}; &ps; time"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + aig_re = re.compile(_aig_re_str) + nodes = int(aig_re.search(str(out)).group().split(" ")[-1]) + if opt_cmd == "&lnetopt": + tt_pct_re = re.compile(_avg_cs_re_str) + tt_pct = float(tt_pct_re.search(str(out)).group().split(" ")[-1]) + else: + tt_pct = None + time_re = re.compile(_elapse_s_re_str) + time_s = float(time_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(tt_pct) + print(time_s) + print(out) + print(err) + return nodes, tt_pct, time_s, out, err # TODO: return the number of nodes, tt%, time + +def optimize_mfs2(circuit_file, output_file, abc_path=os.environ["ABC_ROOT"], command="mfs2", mapping="if -K 6 -a;", working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"read {circuit_file}; {mapping} {command}; write_blif {output_file}; print_stats"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + lut_re = re.compile(_lut_re_str) + nodes = int(lut_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(out) + print(err) + return nodes, out, err # TODO: return the number of nodes + +def iterative_mfs2_optimize(circuit_file, output_file, tmp_file="tmp.blif", max_loop=100, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + tmp_file_path = tmp_file if working_dir is None else f"{working_dir}/{tmp_file}" + output_file_path = output_file if working_dir is None else f"{working_dir}/{output_file}" + cmd = [f"{abc_path}/abc", '-c', f"read {circuit_file}; sweep; write_blif {tmp_file}; print_stats"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + lut_re = re.compile(_lut_re_str) + nodes = int(lut_re.search(str(out)).group().split(" ")[-1]) + best = nodes + shutil.copy(tmp_file_path, output_file_path) + if verbose: + print(nodes) + print(best) + print(out) + print(err) + for i in range(max_loop): + if i == 0: + cmd = [f"{abc_path}/abc", '-c', f"read {tmp_file}; mfs2; write_blif {tmp_file}; print_stats"] + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + if verbose: + print(" ".join(cmd)) + out, err = proc.communicate() + lut_re = re.compile(_lut_re_str) + nodes = int(lut_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(out) + print(err) + else: + nodes, out, err = optimize_mfs2(tmp_file, tmp_file, abc_path=abc_path, working_dir=working_dir, verbose=verbose) + if nodes >= best: + break + else: + if verbose: + print(best) + best = nodes + shutil.copy(tmp_file_path, output_file_path) + os.remove(tmp_file_path) + return best + +def tech_map_circuit(circuit_file, output_blif, input_bitwidth, output_bitwidth, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"&r {circuit_file}; &lnetmap -I {input_bitwidth} -O {output_bitwidth}; write {output_blif}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + if verbose: + print(out) + print(err) + return out, err + +def pipeline_tech_mapped_circuit(circuit_file, output_verilog, num_registers, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"read {circuit_file}; print_stats; pipe -L {num_registers}; print_stats; retime -M 4; print_stats; sweep; print_stats; write_verilog -fm {output_verilog}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + lut_re = re.compile(_lut_re_str) + nodes = int(lut_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(out) + print(err) + return nodes, out, err + +def tech_map_to_verilog(circuit_file, output_verilog, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + cmd = [f"{abc_path}/abc", '-c', f"read {circuit_file}; print_stats; write_verilog -fm {output_verilog}"] + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + lut_re = re.compile(_lut_re_str) + nodes = int(lut_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(nodes) + print(out) + print(err) + return nodes, out, err + +def evaluate_accuracy(circuit_file, sim_output_file, reference_txt, output_bitwidth, abc_path=os.environ["ABC_ROOT"], working_dir=None, verbose=False): + if circuit_file.endswith(".aig"): + cmd = [f"{abc_path}/abc", '-c', f"&r {circuit_file}; &lneteval -O {output_bitwidth} {sim_output_file} {reference_txt}"] + elif circuit_file.endswith(".blif"): + cmd = [f"{abc_path}/abc", '-c', f"read {circuit_file}; strash; &get; &lneteval -O {output_bitwidth} {sim_output_file} {reference_txt}"] + else: + raise ValueError(f"Unsupported file type: {circuit_file}") + if verbose: + print(" ".join(cmd)) + proc = subprocess.Popen(cmd, cwd=working_dir, stdout=subprocess.PIPE, env=os.environ) + out, err = proc.communicate() + acc_re = re.compile(_acc_re_str) + accuracy = float(acc_re.search(str(out)).group().split(" ")[-1]) + if verbose: + print(accuracy) + print(out) + print(err) + return accuracy, out, err # TODO: accuracy %, time + +def generate_prepare_script_string(num_layers, path): + prepare_script_template = """\ +# This script prepares experiments in ABC by deriving intermediate simulation patterns + +# Assuming that verilog/BENCH for each layer of the network are in files "ver/layer{{0,1,2,..}}.v" +# and input/output patterns are the network are in files {{train,test}}_{{input,output}}.txt + + +# ==================================================================================== +# Read the layers from Verilog/BENCH files +{read_layers_string} + +# ==================================================================================== +# Convert input patterns into the internal binary representation +&lnetread {path}/train_input.txt {path}/train.sim +&lnetread {path}/test_input.txt {path}/test.sim + + +# ==================================================================================== +# Generate training simulation info for the inputs of each layer +{simulate_layers_string} + +# ==================================================================================== +# Combine all layers into one monolithic AIG for the whole network (layers.aig) +{gen_monolithic_aig_string} +""" + read_layer_template = "&lnetread {path}/ver/layer{i}.v; &ps; &w {path}/layer{i}.aig\n" + simulate_layer_template = "&r {path}/layer{i}.aig; &lnetsim {path}/train{it}.sim {path}/train{ip1}.sim\n" + gen_monolithic_aig_template = "putontop {layers_aig_string}; st; ps; write {path}/layers.aig\n" + read_layers_string = "" + simulate_layers_string = "" + layers_aig_string = "" + for i in range(num_layers): + read_layers_string += read_layer_template.format(i=i, path=path) + simulate_layers_string += simulate_layer_template.format(i=i, it="" if i == 0 else i, ip1=i+1, path=path) + layers_aig_string += "{path}/layer{i}.aig ".format(i=i, path=path) + gen_monolithic_aig_string = gen_monolithic_aig_template.format(layers_aig_string=layers_aig_string.strip(), path=path) + return prepare_script_template.format( path=path, + read_layers_string=read_layers_string, + simulate_layers_string=simulate_layers_string, + gen_monolithic_aig_string=gen_monolithic_aig_string) + + +def generate_opt_script_string(module_list, path, num_registers, rarity=0, opt_cmd="&lnetopt"): + opt_script_template = """\ +# Generating script with rarity = {rarity}. + +# ---- rarity = {rarity} ------------------------------------------------------------------------------------------------------- +{optimise_with_rarity_string} + +{gen_monolithic_aig_string} + +{technology_map_layers_string} + +{gen_monolithic_blif_string} + +read {path}/blif/layers_opt.blif; ps; pipe -L {num_registers}; ps; retime -M 4; ps; sweep; ps; write_verilog -fm {path}/ver/layers_opt_p{num_registers}.v + +&r {path}/aig/layers_opt.aig; &lnetsim {path}/train.sim {path}/train.simo +&r {path}/aig/layers_opt.aig; &lneteval -O 2 {path}/train.simo {path}/train_output.txt + +&r {path}/aig/layers_opt.aig; &lnetsim {path}/test.sim {path}/test.simo +&r {path}/aig/layers_opt.aig; &lneteval -O 2 {path}/test.simo {path}/test_output.txt + +""" + optimise_with_rarity_template = "&r {path}/layer{i}.aig; &ps; {opt_cmd} -I {fanin_bits} -O {fanout_bits} -R {rarity} {path}/train{it}.sim; &w {path}/aig/layer{i}_opt.aig; &ps; time\n" + technology_map_layer_template = "&r {path}/aig/layer{i}_opt.aig; &lnetmap -I {fanin_bits} -O {fanout_bits}; write {path}/blif/layer{i}_opt.blif; write_verilog -fm {path}/ver/layer{i}_opt.v\n" + gen_monolithic_aig_template = "putontop {aig_layers_string}; st; ps; write {path}/aig/layers_opt.aig\n" + gen_monolithic_blif_template = "putontop {blif_layers_string}; sw; ps; write {path}/blif/layers_opt.blif\n" + num_layers = len(module_list) # TODO: fetch number of layers from the model + optimise_with_rarity_string = "" + technology_map_layers_string = "" + aig_layers_string = "" + blif_layers_string = "" + for i in range(num_layers): + # Read in fanin/fanout bits + # Add assertion that fanin/fanout bits for all neuron is that same + layer = module_list[i] + _, input_bitwidth = layer.input_quant.get_scale_factor_bits() + _, output_bitwidth = layer.output_quant.get_scale_factor_bits() + num_indices = len(layer.neuron_truth_tables[0]) + fanin_bits = input_bitwidth*num_indices + fanout_bits = output_bitwidth + + # Generate optimisation script. + optimise_with_rarity_string += optimise_with_rarity_template.format(fanin_bits=fanin_bits, fanout_bits=fanout_bits, it="" if i == 0 else i, i=i, path=path, rarity=rarity, opt_cmd=opt_cmd) + technology_map_layers_string += technology_map_layer_template.format(fanin_bits=fanin_bits, fanout_bits=fanout_bits, i=i, path=path) + aig_layers_string += "{path}/aig/layer{i}_opt.aig ".format(i=i, path=path) + blif_layers_string += "{path}/blif/layer{i}_opt.blif ".format(i=i, path=path) + gen_monolithic_aig_string = gen_monolithic_aig_template.format(aig_layers_string=aig_layers_string.strip(), path=path) + gen_monolithic_blif_string = gen_monolithic_blif_template.format(blif_layers_string=blif_layers_string.strip(), path=path) + return opt_script_template.format( rarity=rarity, + num_registers=num_registers, + path=path, + optimise_with_rarity_string=optimise_with_rarity_string, + gen_monolithic_aig_string=gen_monolithic_aig_string, + technology_map_layers_string=technology_map_layers_string, + gen_monolithic_blif_string=gen_monolithic_blif_string) + + diff --git a/src/logicnets/nn.py b/src/logicnets/nn.py index 4d0076439..60d835c5f 100644 --- a/src/logicnets/nn.py +++ b/src/logicnets/nn.py @@ -47,10 +47,40 @@ def generate_truth_tables(model: nn.Module, verbose: bool = False) -> None: model.training = training # TODO: Create a container module which performs this function. -def lut_inference(model: nn.Module) -> None: +def lut_inference(model: nn.Module, track_used_luts: bool = False) -> None: for name, module in model.named_modules(): if type(module) == SparseLinearNeq: - module.lut_inference() + module.lut_inference(track_used_luts=track_used_luts) + +# TODO: Create a container module which performs this function. +def save_luts(model: nn.Module, path: str) -> None: + lut_dict = {} + for name, module in model.named_modules(): + if type(module) == SparseLinearNeq: + luts = module.neuron_truth_tables + indices = list(map(lambda x: x[0], luts)) + tt_inputs = list(map(lambda x: x[1], luts)) + tt_input_bin_str = list(map(lambda x: list(map(lambda y: list(map(lambda z: module.input_quant.get_bin_str(z), y)), x)), tt_inputs)) + tt_float_outputs = list(map(lambda x: x[2], luts)) + tt_bin_outputs = list(map(lambda x: x[3], luts)) + tt_outputs_bin_str = list(map(lambda x: list(map(lambda y: module.output_quant.get_bin_str(y), x)), tt_bin_outputs)) + histogram = module.used_luts_histogram + lut_dict[name] = { + 'indices': indices, + 'input_state_space': tt_inputs, + 'input_state_space_bin_str': tt_input_bin_str, + 'output_state_space_float': tt_float_outputs, + 'output_state_space_bin': tt_bin_outputs, + 'output_state_space_bin_str': tt_outputs_bin_str, + 'histogram': histogram, + } + torch.save(lut_dict, path) + +# TODO: Create a container module which performs this function. +def load_histograms(model: nn.Module, lut_dict: dict) -> None: + for name, module in model.named_modules(): + if name in lut_dict.keys(): + module.used_luts_histogram = lut_dict[name]['histogram'] # TODO: Create a container module which performs this function. def neq_inference(model: nn.Module) -> None: @@ -60,7 +90,7 @@ def neq_inference(model: nn.Module) -> None: # TODO: Should this go in with the other verilog functions? # TODO: Support non-linear topologies -def module_list_to_verilog_module(module_list: nn.ModuleList, module_name: str, output_directory: str, add_registers: bool = True, generate_bench: bool = True): +def module_list_to_verilog_module(module_list: nn.ModuleList, module_name: str, output_directory: str, add_registers: bool = True, generate_bench: bool = True, freq_thresh = None): input_bitwidth = None output_bitwidth = None module_contents = "" @@ -68,7 +98,7 @@ def module_list_to_verilog_module(module_list: nn.ModuleList, module_name: str, m = module_list[i] if type(m) == SparseLinearNeq: module_prefix = f"layer{i}" - module_input_bits, module_output_bits = m.gen_layer_verilog(module_prefix, output_directory, generate_bench=generate_bench) + module_input_bits, module_output_bits = m.gen_layer_verilog(module_prefix, output_directory, freq_thresh=freq_thresh, generate_bench=generate_bench) if i == 0: input_bitwidth = module_input_bits if i == len(module_list)-1: @@ -115,11 +145,13 @@ def __init__(self, in_features: int, out_features: int, input_quant, output_quan self.neuron_truth_tables = None self.apply_input_quant = apply_input_quant self.apply_output_quant = apply_output_quant + self.track_used_luts = False + self.used_luts_histogram = None # TODO: Move the verilog string templates to elsewhere # TODO: Move this to another class # TODO: Update this code to support custom bitwidths per input/output - def gen_layer_verilog(self, module_prefix, directory, generate_bench: bool = True): + def gen_layer_verilog(self, module_prefix, directory, freq_thresh = None, generate_bench: bool = True): _, input_bitwidth = self.input_quant.get_scale_factor_bits() _, output_bitwidth = self.output_quant.get_scale_factor_bits() input_bitwidth, output_bitwidth = int(input_bitwidth), int(output_bitwidth) @@ -130,7 +162,7 @@ def gen_layer_verilog(self, module_prefix, directory, generate_bench: bool = Tru for index in range(self.out_features): module_name = f"{module_prefix}_N{index}" indices, _, _, _ = self.neuron_truth_tables[index] - neuron_verilog = self.gen_neuron_verilog(index, module_name) # Generate the contents of the neuron verilog + neuron_verilog = self.gen_neuron_verilog(index, module_name, freq_thresh=freq_thresh) # Generate the contents of the neuron verilog with open(f"{directory}/{module_name}.v", "w") as f: f.write(neuron_verilog) if generate_bench: @@ -150,7 +182,7 @@ def gen_layer_verilog(self, module_prefix, directory, generate_bench: bool = Tru # TODO: Move the verilog string templates to elsewhere # TODO: Move this to another class - def gen_neuron_verilog(self, index, module_name): + def gen_neuron_verilog(self, index, module_name, freq_thresh=None): indices, input_perm_matrix, float_output_states, bin_output_states = self.neuron_truth_tables[index] _, input_bitwidth = self.input_quant.get_scale_factor_bits() _, output_bitwidth = self.output_quant.get_scale_factor_bits() @@ -163,7 +195,11 @@ def gen_neuron_verilog(self, index, module_name): val = input_perm_matrix[i,idx] entry_str += self.input_quant.get_bin_str(val) res_str = self.output_quant.get_bin_str(bin_output_states[i]) - lut_string += f"\t\t\t{int(cat_input_bitwidth)}'b{entry_str}: M1r = {int(output_bitwidth)}'b{res_str};\n" + if (freq_thresh is None) or (self.used_luts_histogram[index][i] >= freq_thresh): + lut_string += f"\t\t\t{int(cat_input_bitwidth)}'b{entry_str}: M1r = {int(output_bitwidth)}'b{res_str};\n" + # Add a default "don't care" statement + default_string = int(output_bitwidth) * 'x' + lut_string += f"\t\t\tdefault: M1r = {int(output_bitwidth)}'b{default_string};\n" return generate_lut_verilog(module_name, int(cat_input_bitwidth), int(output_bitwidth), lut_string) # TODO: Move the string templates to bench.py @@ -187,8 +223,9 @@ def gen_neuron_bench(self, index, module_name): lut_string += generate_lut_input_string(int(cat_input_bitwidth)) return generate_lut_bench(int(cat_input_bitwidth), int(output_bitwidth), lut_string) - def lut_inference(self): + def lut_inference(self, track_used_luts=False): self.is_lut_inference = True + self.track_used_luts = track_used_luts self.input_quant.bin_output() self.output_quant.bin_output() @@ -198,7 +235,7 @@ def neq_inference(self): self.output_quant.float_output() # TODO: This function might be a useful utility outside of this class.. - def table_lookup(self, connected_input: Tensor, input_perm_matrix: Tensor, bin_output_states: Tensor) -> Tensor: + def table_lookup(self, connected_input: Tensor, input_perm_matrix: Tensor, bin_output_states: Tensor, neuron_lut_histogram=None) -> Tensor: fan_in_size = connected_input.shape[1] ci_bcast = connected_input.unsqueeze(2) # Reshape to B x Fan-in x 1 pm_bcast = input_perm_matrix.t().unsqueeze(0) # Reshape to 1 x Fan-in x InputStates @@ -207,17 +244,29 @@ def table_lookup(self, connected_input: Tensor, input_perm_matrix: Tensor, bin_o if not (matches == torch.ones_like(matches,dtype=matches.dtype)).all(): raise Exception(f"One or more vectors in the input is not in the possible input state space") indices = torch.argmax(eq.type(torch.int64),dim=1) + if self.track_used_luts: + # TODO: vectorize this loop + for i in indices: + neuron_lut_histogram[i] += 1 return bin_output_states[indices] def lut_forward(self, x: Tensor) -> Tensor: if self.apply_input_quant: x = self.input_quant(x) # Use this to fetch the bin output of the input, if the input isn't already in binary format + # TODO: Put this in a child class(?) + # TODO: Add support for non-uniform fan-in + if self.track_used_luts: + if self.used_luts_histogram is None: + self.used_luts_histogram = self.out_features * [None] + for i in range(self.out_features): + self.used_luts_histogram[i] = torch.zeros(size=(len(self.neuron_truth_tables[i][2]),), dtype=torch.int64) y = torch.zeros((x.shape[0],self.out_features)) # Perform table lookup for each neuron output for i in range(self.out_features): indices, input_perm_matrix, float_output_states, bin_output_states = self.neuron_truth_tables[i] + neuron_lut_histogram = self.used_luts_histogram[i] if self.track_used_luts else None connected_input = x[:,indices] - y[:,i] = self.table_lookup(connected_input, input_perm_matrix, bin_output_states) + y[:,i] = self.table_lookup(connected_input, input_perm_matrix, bin_output_states, neuron_lut_histogram=neuron_lut_histogram) return y def forward(self, x: Tensor) -> Tensor: diff --git a/src/logicnets/synthesis.py b/src/logicnets/synthesis.py index 8193a6369..4856640c4 100644 --- a/src/logicnets/synthesis.py +++ b/src/logicnets/synthesis.py @@ -14,10 +14,27 @@ import os import subprocess +import shutil from shutil import which +import glob + +from .abc import verilog_bench_to_aig,\ + txt_to_sim,\ + simulate_circuit,\ + putontop_aig,\ + putontop_blif,\ + optimize_bdd_network,\ + evaluate_accuracy,\ + tech_map_circuit,\ + iterative_mfs2_optimize,\ + pipeline_tech_mapped_circuit,\ + tech_map_to_verilog +from .verilog import generate_abc_verilog_wrapper,\ + fix_abc_module_name #xcvu9p-flgb2104-2-i # TODO: Add option to perform synthesis on a remote server +# Synthesise design with vivado and get resource counts def synthesize_and_get_resource_counts(verilog_dir, top_name, fpga_part = "xcku3p-ffva676-1-e", clk_name="clk", clk_period_ns=5.0, post_synthesis = 0): # old part : "xczu3eg-sbva484-1-i" # ensure that the OH_MY_XILINX envvar is set @@ -55,3 +72,104 @@ def synthesize_and_get_resource_counts(verilog_dir, top_name, fpga_part = "xcku3 else: ret["fmax_mhz"] = 1000.0 / (clk_period_ns - ret["WNS"]) return ret + +# Optimize the design with ABC +def synthesize_and_get_resource_counts_with_abc(verilog_dir, module_list, pipeline_stages=0, freq_thresh=0, train_input_txt="train_input.txt", train_output_txt="train_output.txt", test_input_txt="test_input.txt", test_output_txt="test_output.txt", bdd_opt_cmd="lnetopt", verbose=False): + if "ABC_ROOT" not in os.environ: + raise Exception("The environment variable ABC_ROOT is not defined.") + abc_path = os.environ["ABC_ROOT"] + + # Create directories and symlinks ready for processing with ABC + project_prefix = "abc" + abc_project_root = f"{verilog_dir}/{project_prefix}" + verilog_bench_dir = f"{abc_project_root}/ver" + aig_dir = f"{abc_project_root}/aig" + blif_dir = f"{abc_project_root}/blif" + veropt_dir = f"{abc_project_root}/veropt" + if not os.path.exists(verilog_bench_dir): + os.makedirs(verilog_bench_dir) + if not os.path.exists(aig_dir): + os.makedirs(aig_dir) + if not os.path.exists(blif_dir): + os.makedirs(blif_dir) + if not os.path.exists(veropt_dir): + os.makedirs(veropt_dir) + # Fetch the right source files from the verilog directory + source_files = glob.glob(f"{verilog_dir}/logicnet.v") + [f"{verilog_dir}/layer{i}.v" for i in range(len(module_list))] + glob.glob(f"{verilog_dir}/*.bench") + for f in source_files: + shutil.copy(f, verilog_bench_dir) + # Fetch the I/O files + for f in list(map(lambda x: f"{verilog_dir}/{x}", [train_input_txt, train_output_txt, test_input_txt, test_output_txt])): + shutil.copy(f, f"{abc_project_root}") + + # Preparation - model / I/O conversion + # Convert txt inputs into the sim format + out, err = txt_to_sim(train_input_txt, "train.sim", working_dir=abc_project_root, verbose=verbose) + out, err = txt_to_sim(test_input_txt, "test.sim", working_dir=abc_project_root) + + # Create AIGs from verilog + for i in range(len(module_list)): + nodes, out, err = verilog_bench_to_aig(f"ver/layer{i}.v", f"aig/layer{i}.aig", working_dir=abc_project_root, verbose=verbose) + + # Simulate each layer + for i in range(len(module_list)): + out, err = simulate_circuit(f"aig/layer{i}.aig", f"train{i}.sim" if i != 0 else "train.sim", f"train{i+1}.sim", working_dir=abc_project_root, verbose=verbose) + + # Synthesis + average_tt_pcts = [] + for i in range(len(module_list)): + _, input_bitwidth = module_list[i].input_quant.get_scale_factor_bits() + _, output_bitwidth = module_list[i].output_quant.get_scale_factor_bits() + indices, _, _, _ = module_list[i].neuron_truth_tables[0] + fanin = len(indices) + nodes, tt_pct, time, out, err = optimize_bdd_network(f"aig/layer{i}.aig", f"aig/layer{i}_full.aig", int(input_bitwidth*fanin), int(output_bitwidth), freq_thresh, f"train{i}.sim" if i != 0 else "train.sim", opt_cmd=bdd_opt_cmd, working_dir=abc_project_root, verbose=verbose) + average_tt_pcts.append(tt_pct) + + # Technology mapping + for i in range(len(module_list)): + _, input_bitwidth = module_list[i].input_quant.get_scale_factor_bits() + _, output_bitwidth = module_list[i].output_quant.get_scale_factor_bits() + indices, _, _, _ = module_list[i].neuron_truth_tables[0] + fanin = len(indices) + out, err = tech_map_circuit(f"aig/layer{i}_full.aig", f"blif/layer{i}_full.blif", int(input_bitwidth*fanin), int(output_bitwidth), working_dir=abc_project_root, verbose=verbose) + + # Generate monolithic circuits + if len(module_list) > 1: + nodes, out, err = putontop_aig([f"aig/layer{i}_full.aig" for i in range(len(module_list))], f"aig/layers_full.aig", working_dir=abc_project_root, verbose=verbose) + nodes, out, err = putontop_blif([f"blif/layer{i}_full.blif" for i in range(len(module_list))], f"blif/layers_full.blif", working_dir=abc_project_root, verbose=verbose) + else: + shutil.copy(f"{aig_dir}/layer0_full.aig", f"{aig_dir}/layers_full.aig") + shutil.copy(f"{blif_dir}/layer0_full.blif", f"{blif_dir}/layers_full.blif") + + # Generic logic synthesis optimizations + nodes = iterative_mfs2_optimize(circuit_file=f"blif/layers_full.blif", output_file=f"blif/layers_full_opt.blif", tmp_file="blif/tmp.blif", max_loop=100, working_dir=abc_project_root, verbose=verbose) + + # Generate verilog, with or without pipelining + if pipeline_stages == 0: + nodes, out, err = tech_map_to_verilog(circuit_file=f"blif/layers_full_opt.blif", output_verilog=f"veropt/layers_full_opt.v", working_dir=abc_project_root, verbose=verbose) + else: + nodes, out, err = pipeline_tech_mapped_circuit(circuit_file=f"blif/layers_full_opt.blif", output_verilog=f"veropt/layers_full_opt.v", num_registers=num_registers, working_dir=abc_project_root, verbose=verbose) + fix_abc_module_name(f"{veropt_dir}/layers_full_opt.v", f"{veropt_dir}/layers_full_opt.v", "\\aig", "layers_full_opt", add_timescale=True) + + # Generate top-level entity wrapper + _, input_bitwidth = module_list[0].input_quant.get_scale_factor_bits() + _, output_bitwidth = module_list[-1].output_quant.get_scale_factor_bits() + input_bitwidth, output_bitwidth = int(input_bitwidth), int(output_bitwidth) + total_input_bits = module_list[0].in_features*input_bitwidth + total_output_bits = module_list[-1].out_features*output_bitwidth + module_name="logicnet" + veropt_wrapper_str = generate_abc_verilog_wrapper(module_name=module_name, input_name="M0", input_bits=total_input_bits, output_name=f"M{len(module_list)}", output_bits=total_output_bits, submodule_name="layers_full_opt", num_registers=pipeline_stages) + with open(f"{veropt_dir}/{module_name}.v", "w") as f: + f.write(veropt_wrapper_str) + + # Evaluation + # Training set: + _, output_bitwidth = module_list[-1].output_quant.get_scale_factor_bits() + out, err = simulate_circuit(f"blif/layers_full_opt.blif", "train.sim", "train.simo", working_dir=abc_project_root, verbose=verbose) + train_accuracy, out, err = evaluate_accuracy(f"blif/layers_full_opt.blif", "train.simo", train_output_txt, int(output_bitwidth), working_dir=abc_project_root, verbose=verbose) + # Test set: + out, err = simulate_circuit(f"blif/layers_full_opt.blif", "test.sim", "test.simo", working_dir=abc_project_root, verbose=verbose) + test_accuracy, out, err = evaluate_accuracy(f"blif/layers_full_opt.blif", "test.simo", test_output_txt, int(output_bitwidth), working_dir=abc_project_root, verbose=verbose) + + return train_accuracy, test_accuracy, nodes, average_tt_pcts + diff --git a/src/logicnets/verilog.py b/src/logicnets/verilog.py index f073a4692..2911979e1 100644 --- a/src/logicnets/verilog.py +++ b/src/logicnets/verilog.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np + def generate_register_verilog(module_name="myreg", param_name="DataWidth", input_name="data_in", output_name="data_out"): register_template = """\ module {module_name} #(parameter {param_name}=16) ( @@ -91,3 +93,47 @@ def generate_neuron_connection_verilog(input_indices, input_bitwidth): connection_string += ", " return connection_string +def fix_abc_module_name(input_verilog_file, output_verilog_file, old_module_name, new_module_name, add_timescale: bool = False): + with open(input_verilog_file, 'r') as f: + lines = f.readlines() + with open(output_verilog_file, 'w') as f: + if add_timescale: + f.write("`timescale 1 ps / 1 ps\n") + for l in lines: + if l.__contains__(f"module {old_module_name}"): + if add_timescale: + l = f"module {new_module_name} (clock,\n" + else: + l = f"module {new_module_name} (\n" + f.write(l) + +def generate_abc_verilog_wrapper(module_name: str, input_name: str, input_bits: int, output_name: str, output_bits: int, submodule_name: str, num_registers: int, add_timescale: bool = True): + abc_wrapper_template = """\ +{timescale} +module {module_name} (input [{input_bits_1:d}:0] {input_name}, input clk, input rst, output[{output_bits_1:d}:0] {output_name}); +{module_contents} +endmodule\n""" + input_digits = int(np.ceil(np.log10(input_bits))) + output_digits = int(np.ceil(np.log10(output_bits))) + module_contents = [] + module_contents.append(f"{submodule_name} {submodule_name}_inst (") + # Connect inputs + if num_registers > 0: + module_contents.append(f" .clock(clk),") + for i in range(input_bits): + module_contents.append(f" .pi{i:0{input_digits}d}({input_name}[{i}]),") + for i in range(output_bits): + if i < output_bits-1: + module_contents.append(f" .po{i:0{output_digits}d}({output_name}[{i}]),") + else: + module_contents.append(f" .po{i:0{output_digits}d}({output_name}[{i}])") + module_contents.append(f" );\n") + module_contents = "\n".join(module_contents) + return abc_wrapper_template.format( module_name=module_name, + input_name=input_name, + input_bits_1=input_bits-1, + output_name=output_name, + output_bits_1=output_bits-1, + module_contents=module_contents, + timescale="`timescale 1 ps / 1 ps" if add_timescale else "") +