From 56c57e5ef16f958cc66595002957fd6e4e1e7c14 Mon Sep 17 00:00:00 2001
From: Thierry Moreau <moreau@cs.washington.edu>
Date: Sun, 5 Aug 2018 17:09:55 -0700
Subject: [PATCH] [AUTOMATION] Infrastructure to use hardware and schedules
 from vta-experiments (#7)

---
 vta/config/pynq_sample.json         | 21 +++++++++++----------
 vta/hardware/xilinx/sim/vta_test.cc |  6 +++---
 vta/python/vta/bitstream.py         |  1 -
 vta/python/vta/environment.py       |  2 +-
 vta/python/vta/top/vta_conv2d.py    | 21 +++++++++++++++++++--
 vta/src/sim/sim_driver.cc           |  2 +-
 6 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/vta/config/pynq_sample.json b/vta/config/pynq_sample.json
index fd6190caa9ed8..5daa27e9a3d82 100644
--- a/vta/config/pynq_sample.json
+++ b/vta/config/pynq_sample.json
@@ -1,20 +1,21 @@
 {
   "TARGET" : "pynq",
-  "HW_VER" : "0.0.1",
+  "HW_VER" : "0.0.2",
   "HW_FREQ" : 100,
-  "HW_CLK_TARGET" : 8,
-  "ALU" : true,
-  "GEMM_II" : 2,
-  "TALU_II" : 4,
+  "HW_CLK_TARGET" : 7,
+  "ALU_EN" : true,
+  "MUL_EN" : true,
+  "GEMM_II" : 1,
+  "TALU_II" : 2,
   "LOG_INP_WIDTH" : 3,
-  "LOG_WGT_WIDTH" : 1,
+  "LOG_WGT_WIDTH" : 3,
   "LOG_ACC_WIDTH" : 5,
   "LOG_OUT_WIDTH" : 3,
   "LOG_BATCH" : 0,
-  "LOG_BLOCK_IN" : 5,
-  "LOG_BLOCK_OUT" : 5,
+  "LOG_BLOCK_IN" : 4,
+  "LOG_BLOCK_OUT" : 4,
   "LOG_UOP_BUFF_SIZE" : 15,
-  "LOG_INP_BUFF_SIZE" : 17,
-  "LOG_WGT_BUFF_SIZE" : 17,
+  "LOG_INP_BUFF_SIZE" : 15,
+  "LOG_WGT_BUFF_SIZE" : 18,
   "LOG_ACC_BUFF_SIZE" : 17
 }
diff --git a/vta/hardware/xilinx/sim/vta_test.cc b/vta/hardware/xilinx/sim/vta_test.cc
index 266eeaae2f9b8..514c15e94fab8 100644
--- a/vta/hardware/xilinx/sim/vta_test.cc
+++ b/vta/hardware/xilinx/sim/vta_test.cc
@@ -60,13 +60,13 @@ int main(void) {
 #endif // ALU_EN
 
     // Run blocked GEMM test
-    status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, true, 2);
+    // status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, true, 2);
     status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, false, 2);
-    status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, true, 1);
+    // status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, true, 1);
     status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, false, 1);
 
     // Simple GEMM unit test
-    status |= gemm_test(4 * VTA_BATCH, 4 * VTA_BLOCK_OUT, 4 * VTA_BLOCK_IN, true);
+    status |= gemm_test(4 * VTA_BATCH, 4 * VTA_BLOCK_OUT, 4 * VTA_BLOCK_IN, false);
 
     return status;
 }
diff --git a/vta/python/vta/bitstream.py b/vta/python/vta/bitstream.py
index 73ce062cb62c9..3f20d818575ba 100644
--- a/vta/python/vta/bitstream.py
+++ b/vta/python/vta/bitstream.py
@@ -28,7 +28,6 @@ def get_bitstream_path():
 
     # Derive destination path
     cache_dir = os.getenv("VTA_CACHE_PATH", os.path.join(os.getenv("HOME"), ".vta_cache/"))
-    cache_dir = os.path.join(cache_dir, env.TARGET)
     # Create the directory if it didn't exist
     if not os.path.exists(cache_dir):
         os.makedirs(cache_dir)
diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py
index c270585860bd2..96898e5b3977c 100644
--- a/vta/python/vta/environment.py
+++ b/vta/python/vta/environment.py
@@ -150,7 +150,7 @@ def __init__(self, cfg):
         self._dev_ctx = None
         self._last_env = None
         #  derive bitstream name
-        self.BITSTREAM = "{}_{}x{}x{}_a{}w{}o{}_{}_{}_{}_{}_{}MHz_{}ns_gii{}".format(
+        self.BITSTREAM = "{}/bitstreams_valid/{}x{}x{}_a{}w{}o{}_{}_{}_{}_{}_{}MHz_{}ns_gii{}".format(
             self.HW_VER.replace('.', '_'),
             self.BATCH,
             self.BLOCK_IN,
diff --git a/vta/python/vta/top/vta_conv2d.py b/vta/python/vta/top/vta_conv2d.py
index b0029565f5066..8439d7bb52e7e 100644
--- a/vta/python/vta/top/vta_conv2d.py
+++ b/vta/python/vta/top/vta_conv2d.py
@@ -6,6 +6,7 @@
 import logging
 import tvm
 import topi
+import re
 
 from nnvm.top import registry as reg, OpPattern
 from nnvm.top import nn as _nn
@@ -340,7 +341,7 @@ def _get_workload(data, pad_data, kernel, output):
     w_str = (i_w + w_pad*2 - k_w) // (o_w - 1)
     return Workload(i_b, i_h, i_w, i_c, o_c, k_h, k_w, h_pad, w_pad, h_str, w_str)
 
-def schedule_packed_conv2d(outs, plan=None, skip_load_inp=False, skip_load_wgt=False,
+def schedule_packed_conv2d(outs, planStr=None, skip_load_inp=False, skip_load_wgt=False,
                            skip_load_acc=False, skip_store_out=False, skip_alu=False,
                            skip_gemm=False):
     """ Schedule the packed conv2d.
@@ -377,7 +378,23 @@ def _traverse(op):
     else:
         pad_data = None
     wrkld = _get_workload(data, pad_data, kernel, output)
-    if plan is None:
+    if planStr:
+      matchObj = re.match( r'b(\d+)oc(\d+)ic(\d+)h(\d+)w(\d+)oc_t(\d+)h_t(\d+)', sched_str)
+      b_factor = int(matchObj.group(1))
+      oc_factor = int(matchObj.group(2))
+      ic_factor = int(matchObj.group(3))
+      h_factor = int(matchObj.group(4))
+      w_factor = int(matchObj.group(5))
+      oc_nthread = int(matchObj.group(6))
+      h_nthread = int(matchObj.group(7))
+      plan = Schedule(b_factor=b_factor,
+                      oc_factor=oc_factor,
+                      ic_factor=ic_factor,
+                      h_factor=h_factor,
+                      w_factor=w_factor,
+                      oc_nthread=oc_nthread,
+                      h_nthread=h_nthread)
+    else:
         plan = find_schedules(wrkld, vt_only=True, best_only=True)[0]
         logging.info("Trying to find plan for %s", wrkld)
     env = get_env()
diff --git a/vta/src/sim/sim_driver.cc b/vta/src/sim/sim_driver.cc
index ef1f62842f01a..b9cbd22c8adbd 100644
--- a/vta/src/sim/sim_driver.cc
+++ b/vta/src/sim/sim_driver.cc
@@ -526,7 +526,7 @@ class Device {
 
   template<bool use_imm, typename F>
   void RunALULoop(const VTAAluInsn* op, F func) {
-    prof_->alu_counter += op->iter_out * op->iter_in * op->uop_end - op->uop_bgn;
+    prof_->alu_counter += op->iter_out * op->iter_in * (op->uop_end - op->uop_bgn);
     if (prof_->SkipExec()) return;
     for (int y = 0; y < op->iter_out; ++y) {
       for (int x = 0; x < op->iter_in; ++x) {