-
Notifications
You must be signed in to change notification settings - Fork 50
/
SYN.py
executable file
·5264 lines (4801 loc) · 225 KB
/
SYN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
import copy
import datetime
import glob
import hashlib
import math
import os
import pickle
import sys
from distutils.dir_util import copy_tree
from multiprocessing.pool import ThreadPool
from pathlib import Path
from timeit import default_timer as timer
import CC_TOOLS
import C_TO_LOGIC
import DEVICE_MODELS
import DIAMOND
import EFINITY
import GOWIN
import OPEN_TOOLS
import PYRTL
import QUARTUS
import RAW_VHDL
import SW_LIB
import VHDL
import VIVADO
from utilities import REPO_ABS_DIR
START_TIME = timer()
OUTPUT_DIR_NAME = "pipelinec_output"
SYN_OUTPUT_DIRECTORY = None # Auto created with pid and filename or from user
TOP_LEVEL_MODULE = None # Holds the name of the top level module
SYN_TOOL = None # Attempts to figure out from part number
CONVERT_FINAL_TOP_VERILOG = False # Flag for final top level converison to verilog
DO_SYN_FAIL_SIM = False # Start simulation if synthesis fails
WRITE_AXIS_XO_FILE = False
# Welcome to the land of magic numbers
# "But I think its much worse than you feared" Modest Mouse - I'm Still Here
# Artix 7 100t full RT:
# Coarse sweep BEST IS sliced 494 clocks, get 490 clock pipeline at 153MHz
# Coarse (not sweep) gets to 504 slice, 500 clk, 10 clks off fine for now...
# With BEST_GUESS_MUL_MAX = 10.0, COARSE_SWEEP_MULT_MAX = 1.5, not skipping higher fmax to compensate
# RT pixel_logic
# HIER_SWEEP_MULT_MIN latency # top runs
# 0.125 470 32
# 0.25 470 24
# 0.5 419 7
# 0.75 421 13
# 1.0 423 31
# 1.0625 413 31
# 1.125 370 5
# 1.25 370 5
# 1.5 370 5
# 1.75 370 5
# 1.875 371 4
# 1.9375 350 4
# 1.96875 350 4
# 1.984375 585 9
# 2.0 585 9
# 2.25 493 8
# 2.5 509 7
# === After Recent updates
# 2.0? 536 5
# 1.9375 343 8
# 1.5 368 8
# 1.087 368
# 1.0 417 15
# 0.5 ~417 ~45 reaches 1.0x start after ~33runs
# 0.0 ~417 ~65 reaches 1.0x after ~55 runs
# ECP5 Full RT:
# Coarse sweep gets to: 75 clocks
# Coarse (not sweep) gets to 81 clocks
# HIER_SWEEP_MULT_MIN latency # top runs
# 2.0 80+clks never finished ~24hrs hash=5c32
# 1.9375 ^same
# 1.5 83+clks never finished ~12 hours hash=a39f
# 1.0 63+clks never finished ~12 hrs hash=e748
# 0.5 45+clks never finished ~12 hr hash=22ef
# 0.25 60+clks never finished 12+hours hash=5237
# 0.181 79+clks never finished 12+hours hash=222f
# 0.125 ^same
# 0.09375 ^same
# 0.078125 \/same
# 0.06780026720106881
# 0.0625 66 4 ^ ends up at slighty higher after failing coarse
# 0.046875 ^same
# 0.03125 \/same
# 0.0 70 2 # wow
# Tool increases HIER_SWEEP_MULT_MIN if it has trouble
# i.e. tries to pipeline smaller/less logic modules, easier task
# Starting off too large is bad - no way to recover
# Target pipelining at minimum at modules of period = target/MULT
# 0.0 -> start w/ largest/top level modules first
# 0.5 -> 2 times the target period (not meeting timing),
# 2.0 -> 1/2 the target period (easily meets timing)
HIER_SWEEP_MULT_MIN = None # Cmd line arg sets
HIER_SWEEP_MULT_INC = 0.001 # Intentionally very small, sweep already tries to make largest possible steps
MAX_N_WORSE_RESULTS_MULT = 16 # Multiplier for how many times failing to improve before moving on? divided by total latnecy
BEST_GUESS_MUL_MAX = 5.0 # Multiplier limit on top down register insertion coarsly during middle out sweep
MAX_ALLOWED_LATENCY_MULT = 5.0 # Multiplier limit for individual module coarse register insertion coarsely, similar same as BEST_GUESS_MUL_MAX?
COARSE_SWEEP_MULT_INC = 0.1 # Multiplier increment for how many times fmax to try for compensating not meeting timing
COARSE_SWEEP_MULT_MAX = 1.0 # ==1.0 disables trying to pipeline to fake higher fmax, Max multiplier for internal fmax
MAX_CLK_INC_RATIO = 1.25 # Multiplier for how any extra clocks can be added ex. 1.25 means 25% more stages max
DELAY_UNIT_MULT = 10.0 # Timing is reported in nanoseconds. Multiplier to convert that time into integer units (nanosecs, tenths, hundreds of nanosecs)
INF_MHZ = 1000 # Impossible timing goal
INF_HIER_MULT = 999999.9 # Needed?
SLICE_EPSILON_MULTIPLIER = 5 # 6.684491979 max/best? # Constant used to determine when slices are equal. Higher=finer grain slicing, lower=similar slices are said to be equal
SLICE_STEPS_BETWEEN_REGS = 3 # Multiplier for how narrow to start off the search for better timing. Higher=More narrow start, Experimentally 2 isn't enough, slices shift <0 , > 1 easily....what?
def PART_SET_TOOL(part_str, allow_fail=False):
global SYN_TOOL
if SYN_TOOL is None:
# Try to guess synthesis tool based on part number
# Hacky for now...
if part_str is None:
if allow_fail:
return
else:
# Try to default to part-less estimates from pyrtl?
if PYRTL.IS_INSTALLED():
SYN_TOOL = PYRTL
print("Defaulting to pyrtl based timing estimates...")
else:
print(
"Need to set FPGA part somewhere in the code to continue with synthesis tool support!"
)
print('Ex. #pragma PART "LFE5U-85F-6BG381C"')
sys.exit(0)
else:
if part_str.lower().startswith("xc"):
SYN_TOOL = VIVADO
if os.path.exists(VIVADO.VIVADO_PATH):
print("Vivado:", VIVADO.VIVADO_PATH, flush=True)
else:
if not allow_fail:
raise Exception("Vivado install not found!")
elif (
part_str.lower().startswith("ep")
or part_str.lower().startswith("10c")
or part_str.lower().startswith("5c")
):
SYN_TOOL = QUARTUS
if os.path.exists(QUARTUS.QUARTUS_PATH):
print("Quartus:", QUARTUS.QUARTUS_PATH, flush=True)
else:
if not allow_fail:
raise Exception("Quartus install not found!")
elif part_str.lower().startswith("lfe5u") or part_str.lower().startswith(
"ice"
):
# Diamond fails to create proj for UMG5G part?
if "um5g" in part_str.lower():
SYN_TOOL = OPEN_TOOLS
# Default to open tools for non ice40 (nextpnr not support ooc mode yet)
elif "ice40" not in part_str.lower():
SYN_TOOL = OPEN_TOOLS
else:
SYN_TOOL = DIAMOND
if os.path.exists(DIAMOND.DIAMOND_PATH):
print("Diamond:", DIAMOND.DIAMOND_PATH, flush=True)
else:
if not allow_fail:
raise Exception("Diamond install not found!")
elif part_str.upper().startswith("T8") or part_str.upper().startswith("TI"):
SYN_TOOL = EFINITY
if os.path.exists(EFINITY.EFINITY_PATH):
print("Efinity:", EFINITY.EFINITY_PATH, flush=True)
else:
if not allow_fail:
raise Exception("Efinity install not found!")
elif part_str.upper().startswith("GW"):
SYN_TOOL = GOWIN
if os.path.exists(GOWIN.GOWIN_PATH):
print("Gowin:", GOWIN.GOWIN_PATH, flush=True)
else:
if not allow_fail:
raise Exception("Gowin install not found!")
elif part_str.upper().startswith("CCGM"):
SYN_TOOL = CC_TOOLS
# TODO dont base on cc-toolchain directory?
if os.path.exists(CC_TOOLS.CC_TOOLS_PATH):
print("CologneChip Tools:", CC_TOOLS.CC_TOOLS_PATH, flush=True)
else:
if not allow_fail:
raise Exception("CologneChip toolchain install not found!")
else:
if not allow_fail:
print(
"No known synthesis tool for FPGA part:", part_str, flush=True
)
sys.exit(-1)
if SYN_TOOL is not None:
print("Using", SYN_TOOL.__name__, "synthesizing for part:", part_str)
def TOOL_DOES_PNR():
# Does tool do full PNR or just syn?
if SYN_TOOL is VIVADO:
return VIVADO.DO_PNR == "all"
elif SYN_TOOL is GOWIN:
return GOWIN.DO_PNR == "all"
# Uses PNR
elif (
(SYN_TOOL is QUARTUS)
or (SYN_TOOL is OPEN_TOOLS)
or (SYN_TOOL is EFINITY)
or (SYN_TOOL is CC_TOOLS)
):
return True
# Uses synthesis estimates
elif (SYN_TOOL is DIAMOND) or (SYN_TOOL is PYRTL):
return False
else:
raise Exception("Need to know if tool flow does PnR!")
def GET_CLK_TO_MHZ_AND_CONSTRAINTS_PATH(
parser_state, inst_name=None, allow_no_syn_tool=False
):
ext = None
if SYN_TOOL is VIVADO:
ext = ".xdc"
elif SYN_TOOL is QUARTUS:
ext = ".sdc"
elif SYN_TOOL is DIAMOND and DIAMOND.DIAMOND_TOOL == "lse":
ext = ".ldc"
elif SYN_TOOL is DIAMOND and DIAMOND.DIAMOND_TOOL == "synplify":
ext = ".sdc"
elif SYN_TOOL is OPEN_TOOLS:
ext = ".py"
elif SYN_TOOL is EFINITY:
ext = ".sdc"
elif SYN_TOOL is GOWIN:
ext = ".sdc"
elif SYN_TOOL is PYRTL:
ext = ".sdc"
elif SYN_TOOL is CC_TOOLS:
ext = ".ccf" # Only for temp clock pins, no timing contraints?
else:
if not allow_no_syn_tool:
# Sufjan Stevens - Video Game
raise Exception(
f"Add constraints file ext for syn tool {SYN_TOOL.__name__}"
)
ext = ""
clock_name_to_mhz = {}
if inst_name:
# Default instances get max fmax
clock_name_to_mhz["clk"] = INF_MHZ
"""
# Unless happens to be main with fixed freq
if inst_name in parser_state.main_mhz:
clock_mhz = GET_TARGET_MHZ(inst_name, parser_state, allow_no_syn_tool)
clock_name_to_mhz["clk"] = clock_mhz
"""
out_filename = "clock" + ext
Logic = parser_state.LogicInstLookupTable[inst_name]
output_dir = GET_OUTPUT_DIRECTORY(Logic)
out_filepath = output_dir + "/" + out_filename
else:
out_filename = "clocks" + ext
out_filepath = SYN_OUTPUT_DIRECTORY + "/" + out_filename
for main_func in parser_state.main_mhz:
clock_mhz = GET_TARGET_MHZ(main_func, parser_state, allow_no_syn_tool)
clk_ext_str = VHDL.CLK_EXT_STR(main_func, parser_state)
clk_name = "clk_" + clk_ext_str
clock_name_to_mhz[clk_name] = clock_mhz
return clock_name_to_mhz, out_filepath
# return path
def WRITE_CLK_CONSTRAINTS_FILE(parser_state, inst_name=None):
# Use specified mhz is multimain top
clock_name_to_mhz, out_filepath = GET_CLK_TO_MHZ_AND_CONSTRAINTS_PATH(
parser_state, inst_name
)
f = open(out_filepath, "w")
if SYN_TOOL is OPEN_TOOLS:
# All clock assumed async in nextpnr constraints
for clock_name in clock_name_to_mhz:
clock_mhz = clock_name_to_mhz[clock_name]
if clock_mhz is None:
print(
f"WARNING: No frequency associated with clock {clock_name}. Missing MAIN_MHZ pragma? Setting to maximum rate = {INF_MHZ}MHz so timing report can be generated..."
)
clock_mhz = INF_MHZ
f.write('ctx.addClock("' + clock_name + '", ' + str(clock_mhz) + ")\n")
elif SYN_TOOL is CC_TOOLS:
f.write("#TODO")
else:
# Collect all user generated clocks, no groups for now
all_user_clks = set()
for clk_mhz in parser_state.clk_mhz.values():
clk_name = "clk_" + VHDL.CLK_MHZ_GROUP_TEXT(clk_mhz, None)
all_user_clks.add(clk_name)
# Standard sdc like constraints
for clock_name in clock_name_to_mhz:
clock_mhz = clock_name_to_mhz[clock_name]
if clock_mhz is None:
print(
f"WARNING: No frequency associated with clock {clock_name}. Missing MAIN_MHZ pragma? Setting to maximum rate = {INF_MHZ}MHz so timing report can be generated..."
)
clock_mhz = INF_MHZ
ns = 1000.0 / clock_mhz
# Quartus has some maximum acceptable clock period < 8333333 ns
if SYN_TOOL is QUARTUS:
MAX_NS = 80000
if ns > MAX_NS:
print("Clipping clock", clock_name, "period to", MAX_NS, "ns...")
ns = MAX_NS
# Default cmd is get_ports unless need internal user clk net name
get_thing_cmd = "get_ports"
if clock_name in all_user_clks:
get_thing_cmd = "get_nets"
f.write(
f"create_clock -add -name {clock_name} -period {ns} -waveform {{0 {ns/2.0}}} [{get_thing_cmd} {{{clock_name}}}]\n"
)
# All clock assumed async? Doesnt matter for internal syn
# Rely on generated/board provided constraints for real hardware
if len(clock_name_to_mhz) > 1:
if SYN_TOOL is VIVADO:
f.write(
"set_clock_groups -name async_clks_group -asynchronous -group [get_clocks *] -group [get_clocks *]\n"
)
elif SYN_TOOL is QUARTUS:
# Ignored set_clock_groups at clocks.sdc(3): The clock clk_100p0 was found in more than one -group argument.
# Uh do the hard way?
clk_sets = set()
for clock_name1 in clock_name_to_mhz:
for clock_name2 in clock_name_to_mhz:
if clock_name1 != clock_name2:
clk_set = frozenset([clock_name1, clock_name2])
if clk_set not in clk_sets:
f.write(
"set_clock_groups -asynchronous -group [get_clocks "
+ clock_name1
+ "] -group [get_clocks "
+ clock_name2
+ "]"
)
clk_sets.add(clk_set)
elif SYN_TOOL is DIAMOND:
# f.write("set_clock_groups -name async_clks_group -asynchronous -group [get_clocks *] -group [get_clocks *]")
# ^ is wrong, makes 200mhx system clock?
pass # rely on clock cross path detection error in timing report
else:
raise Exception(
f"How does tool {SYN_TOOL.__name__} deal with async clocks?"
)
f.close()
return out_filepath
# These are the parameters that describe how multiple pipelines are timed
class MultiMainTimingParams:
def __init__(self):
# Pipeline params
self.TimingParamsLookupTable = {}
# TODO some kind of params for clock crossing
def GET_HASH_EXT(self, parser_state):
# Just hash all the slices #TODO fix to just mains
top_level_str = ""
for main_func in sorted(parser_state.main_mhz.keys()):
timing_params = self.TimingParamsLookupTable[main_func]
hash_ext_i = timing_params.GET_HASH_EXT(
self.TimingParamsLookupTable, parser_state
)
top_level_str += hash_ext_i
s = top_level_str
hash_ext = (
"_" + ((hashlib.md5(s.encode("utf-8")).hexdigest())[0:4])
) # 4 chars enough?
return hash_ext
# These are the parameters that describe how a pipeline should be formed
class TimingParams:
def __init__(self, inst_name, logic):
self.logic = logic
self.inst_name = inst_name
# Have the current params (slices) been fixed,
# Default to fixed if known cant be sliced
self.params_are_fixed = False
# Params, private _ since cached
self._slices = [] # Unless raw vhdl (no submodules), these are only ~approximate slices
# ??Maybe add flag for these fixed slices provide latency, dont rebuild? unecessary?
self._has_input_regs = False
self._has_output_regs = False
# Sometimes slices are between submodules,
# This can specify where a stage is artificially started by not allowing submodules to be instantiated even if driven in an early state
# UNUSED FOR NOW
# self.submodule_to_start_stage = {}
# self.submodule_to_end_stage = {}
# Cached stuff
self.calcd_total_latency = None
self.hash_ext = None
# self.timing_report_stage_range = None
def DEEPCOPY(self):
rv = copy.copy(self)
rv._slices = self._slices[:] # COPY
# Logic ok to be same obj
# All others immut right now
return rv
def INVALIDATE_CACHE(self):
self.calcd_total_latency = None
self.hash_ext = None
# self.timing_report_stage_range = None
# I was dumb and used get latency all over
# mAKE CACHED VERSION
def GET_TOTAL_LATENCY(self, parser_state, TimingParamsLookupTable=None):
if self.calcd_total_latency is None:
self.calcd_total_latency = self.CALC_TOTAL_LATENCY(
parser_state, TimingParamsLookupTable
)
return self.calcd_total_latency
def GET_PIPELINE_LOGIC_ADDED_LATENCY(self, parser_state, TimingParamsLookupTable):
total_latency = self.GET_TOTAL_LATENCY(parser_state, TimingParamsLookupTable)
# Remove latency added by user
if self.logic.func_name in parser_state.func_fixed_latency:
fixed_latency = parser_state.func_fixed_latency[self.logic.func_name]
if total_latency < fixed_latency:
raise Exception(
f"{total_latency} latency function {self.logic.func_name} has fixed latency less? {fixed_latency}?"
)
total_latency = total_latency - fixed_latency
pipeline_added_latency = total_latency
if self._has_input_regs:
if pipeline_added_latency == 0:
raise Exception(
f"Zero latency function {self.logic.func_name} has input regs?"
)
pipeline_added_latency -= 1
if self._has_output_regs:
if pipeline_added_latency == 0:
raise Exception(
"Zero latency function {self.logic.func_name} has output regs?"
)
pipeline_added_latency -= 1
return pipeline_added_latency
# Haha why uppercase everywhere ...
def CALC_TOTAL_LATENCY(self, parser_state, TimingParamsLookupTable=None):
# Use hard coded pipelined latency
if self.logic.func_name in parser_state.func_fixed_latency:
fixed_latency = parser_state.func_fixed_latency[self.logic.func_name]
pipeline_latency = fixed_latency
# C built in has multiple shared latencies based on where used
elif len(self.logic.submodule_instances) <= 0:
# Just pipeline slices
pipeline_latency = len(self._slices)
else:
# If cant be sliced then latency must be zero right?
if not self.logic.CAN_BE_SLICED(parser_state):
if self._has_input_regs or self._has_output_regs:
raise Exception(
f"{self.logic.func_name} cannot have IO regs but has been given them!?"
)
# print("Bad io regs on non sliceable!")
# sys.exit(-1)
return 0
if TimingParamsLookupTable is None:
print(
"Need TimingParamsLookupTable for non raw hdl latency",
self.logic.func_name,
)
print(0 / 0)
sys.exit(-1)
pipeline_map = GET_PIPELINE_MAP(
self.inst_name, self.logic, parser_state, TimingParamsLookupTable
)
pipeline_latency = pipeline_map.num_stages - 1
# Adjut latency for io regs
latency = pipeline_latency
if self._has_input_regs:
latency += 1
if self._has_output_regs:
latency += 1
return latency
def RECURSIVE_GET_IO_REGS_AND_NO_SUBMODULE_SLICES(
self, inst_name, Logic, TimingParamsLookupTable, parser_state
):
# All modules include IO reg flags
timing_params = TimingParamsLookupTable[inst_name]
rv = (
timing_params._has_input_regs,
timing_params._has_output_regs,
)
# Only lowest level raw VHDL modules with no submodules include slices
if len(Logic.submodule_instances) > 0:
# Not raw hdl, slices dont guarentee describe pipeline structure
for submodule in sorted(
Logic.submodule_instances
): # MUST BE SORTED FOR CONSISTENT ORDER!
sub_inst = inst_name + C_TO_LOGIC.SUBMODULE_MARKER + submodule
if sub_inst not in parser_state.LogicInstLookupTable:
print("Missing inst_name:", sub_inst)
print("has instances:")
for inst_i, logic_i in parser_state.LogicInstLookupTable.items():
print(inst_i)
print(0 / 0, flush=True)
sys.exit(-1)
sub_logic = parser_state.LogicInstLookupTable[sub_inst]
rv += (
self.RECURSIVE_GET_IO_REGS_AND_NO_SUBMODULE_SLICES(
sub_inst, sub_logic, TimingParamsLookupTable, parser_state
),
)
else:
# Raw HDL
rv += (tuple(timing_params._slices),)
return rv
# Hash ext only reflect raw hdl slices (better would be raw hdl bits per stage)
def BUILD_HASH_EXT(self, inst_name, Logic, TimingParamsLookupTable, parser_state):
# print("BUILD_HASH_EXT",Logic.func_name, flush=True)
io_regs_and_slices_tup = self.RECURSIVE_GET_IO_REGS_AND_NO_SUBMODULE_SLICES(
inst_name, Logic, TimingParamsLookupTable, parser_state
)
s = str(io_regs_and_slices_tup)
full_hash = hashlib.md5(s.encode("utf-8")).hexdigest()
hash_ext = (
"_" + (full_hash[0:8])
) # 4 chars enough, no you dummy, lets hope 8 is
# print(f"inst {inst_name} {full_hash} {hash_ext}")
return hash_ext
def GET_HASH_EXT(self, TimingParamsLookupTable, parser_state):
if self.hash_ext is None:
self.hash_ext = self.BUILD_HASH_EXT(
self.inst_name, self.logic, TimingParamsLookupTable, parser_state
)
return self.hash_ext
def ADD_SLICE(self, slice_point):
if self._slices is None:
self._slices = []
if slice_point > 1.0:
print("Slice > 1.0?", slice_point)
sys.exit(-1)
slice_point = 1.0
if slice_point < 0.0:
print("Slice < 0.0?", slice_point)
print(0 / 0)
sys.exit(-1)
slice_point = 0.0
if slice_point not in self._slices:
self._slices.append(slice_point)
self._slices = sorted(self._slices)
self.INVALIDATE_CACHE()
else:
raise Exception(
f"Slice {slice_point} exists already cant add? slices pre add: {self._slices}"
)
if self.calcd_total_latency is not None:
print("WTF adding a slice and has latency cache?", self.calcd_total_latency)
print(0 / 0)
sys.exit(-1)
def SET_SLICES(self, value):
if value != self._slices:
self._slices = value[:]
self.INVALIDATE_CACHE()
def SET_HAS_IN_REGS(self, value):
if value != self._has_input_regs:
self._has_input_regs = value
self.INVALIDATE_CACHE()
def SET_HAS_OUT_REGS(self, value):
if value != self._has_output_regs:
self._has_output_regs = value
self.INVALIDATE_CACHE()
def GET_SUBMODULE_LATENCY(
self, submodule_inst_name, parser_state, TimingParamsLookupTable
):
submodule_timing_params = TimingParamsLookupTable[submodule_inst_name]
return submodule_timing_params.GET_TOTAL_LATENCY(
parser_state, TimingParamsLookupTable
)
def DEL_ALL_CACHES():
# Clear all caches after parsing is done
global _GET_ZERO_CLK_HASH_EXT_LOOKUP_cache
# global _GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP_cache
_GET_ZERO_CLK_HASH_EXT_LOOKUP_cache = {}
# _GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP_cache = {}
# _GET_ZERO_ADDED_CLKS_PIPELINE_MAP_cache = {}
_GET_ZERO_CLK_HASH_EXT_LOOKUP_cache = {}
_GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP_cache = {}
def GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP(parser_state):
# Cached?
# print("GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP")
cache_key = str(sorted(set(parser_state.LogicInstLookupTable.keys())))
if cache_key in _GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP_cache:
cached_lookup = _GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP_cache[cache_key]
rv = {}
for inst_i, params_i in cached_lookup.items():
rv[inst_i] = params_i.DEEPCOPY()
return rv
# Create empty lookup
ZeroAddedClocksTimingParamsLookupTable = {}
for logic_inst_name in parser_state.LogicInstLookupTable:
logic_i = parser_state.LogicInstLookupTable[logic_inst_name]
timing_params_i = TimingParams(logic_inst_name, logic_i)
ZeroAddedClocksTimingParamsLookupTable[logic_inst_name] = timing_params_i
# Calc cached params so they are in cache
bad_fixed_latency_when_cant_slice_func_names = set()
for logic_inst_name in parser_state.LogicInstLookupTable:
logic_i = parser_state.LogicInstLookupTable[logic_inst_name]
timing_params_i = ZeroAddedClocksTimingParamsLookupTable[logic_inst_name]
total_latency = timing_params_i.GET_TOTAL_LATENCY(
parser_state, ZeroAddedClocksTimingParamsLookupTable
)
# Sanity check functions that can't be sliced arent showing up with slices/latency in them
pipeline_added_latency = timing_params_i.GET_PIPELINE_LOGIC_ADDED_LATENCY(
parser_state, ZeroAddedClocksTimingParamsLookupTable
)
if logic_i.func_name not in bad_fixed_latency_when_cant_slice_func_names:
if (pipeline_added_latency > 0) and not logic_i.BODY_CAN_BE_SLICED(
parser_state
):
print(
"Error: Zero latency static stateful function",
logic_i.func_name,
"actually describes a pipeline of non-zero latency/depth. (",
pipeline_added_latency + 1,
"stages total)",
)
bad_fixed_latency_when_cant_slice_func_names.add(logic_i.func_name)
# Write cache
if logic_i.func_name in _GET_ZERO_CLK_HASH_EXT_LOOKUP_cache:
timing_params_i.hash_ext = _GET_ZERO_CLK_HASH_EXT_LOOKUP_cache[
logic_i.func_name
]
else:
_GET_ZERO_CLK_HASH_EXT_LOOKUP_cache[logic_i.func_name] = (
timing_params_i.GET_HASH_EXT(
ZeroAddedClocksTimingParamsLookupTable, parser_state
)
)
if len(bad_fixed_latency_when_cant_slice_func_names) > 0:
print(
"""Modify one or more of the above functions:
Remove FUNC_LATENCY pragmas specifying fixed pipeline latencies.
OR
Remove stateful static local variables to allow pipelining."""
)
raise Exception("Resolve the above unexpected added pipeline latency errors.")
_GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP_cache[cache_key] = (
ZeroAddedClocksTimingParamsLookupTable
)
return ZeroAddedClocksTimingParamsLookupTable
_GET_ZERO_ADDED_CLKS_PIPELINE_MAP_cache = {}
def GET_ZERO_ADDED_CLKS_PIPELINE_MAP(inst_name, Logic, parser_state, write_files=True):
key = Logic.func_name
# Try cache
try:
rv = _GET_ZERO_ADDED_CLKS_PIPELINE_MAP_cache[key]
# print "_GET_ZERO_ADDED_CLKS_PIPELINE_MAP_cache",key
# Sanity?
if rv.logic != Logic:
print("Zero clock cache no mactho")
sys.exit(-1)
return rv
except:
pass
has_delay = True
# Only need to check submodules, not self
for sub_inst in Logic.submodule_instances:
func_name = Logic.submodule_instances[sub_inst]
sub_func_logic = parser_state.FuncLogicLookupTable[func_name]
if sub_func_logic.delay is None:
print(Logic.func_name, "/", sub_func_logic.func_name)
has_delay = False
break
if not has_delay:
raise Exception("Can't get zero clock pipeline map without delay?")
# Populate table as all 0 added clks
ZeroAddedClocksLogicInst2TimingParams = GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP(
parser_state
)
# Get params for this logic
# print "Logic.func_name",Logic.func_name
# Get pipeline map
zero_added_clks_pipeline_map = GET_PIPELINE_MAP(
inst_name, Logic, parser_state, ZeroAddedClocksLogicInst2TimingParams
)
# Only cache if has delay
# zero_added_clks_pipeline_map.logic.delay is not None and Dont need to check self, only submodules
if zero_added_clks_pipeline_map.zero_clk_max_delay is not None:
_GET_ZERO_ADDED_CLKS_PIPELINE_MAP_cache[key] = zero_added_clks_pipeline_map
else:
# Sanity?
if has_delay:
# Seems to early catch designs optimizing away
raise Exception(
f"It looks like the function {zero_added_clks_pipeline_map.logic.func_name} reduces to constants/wires in an unexpected way? Missing '#pragma FUNC_WIRES {zero_added_clks_pipeline_map.logic.func_name}' ? "
)
return zero_added_clks_pipeline_map
class SubmoduleLevelInfo:
def __init__(self, level_num):
self.level_num = level_num
# Starts with wires driving other wires
self.driver_driven_wire_pairs = []
# Ends with submodule logic connections
self.submodule_insts = []
def IS_EMPTY(self):
return (
len(self.driver_driven_wire_pairs) == 0 and len(self.submodule_insts) == 0
)
class StageInfo:
def __init__(self, stage_num):
self.stage_num = stage_num
# Submodule output wires first
self.submodule_output_ports = []
# Then sequence of per submodule level information
self.submodule_level_infos = []
# This started off as just code writing VHDL
# Then the logic of how the VHDL was written was highjacked for latency calculation
# Then latency calculations were highjacked for logic delay calculations
class PipelineMap:
def __init__(self, logic):
self.logic = logic
# Any logic will have
self.num_stages = 1 # Comb logic
# New per stage info class
self.stage_infos = []
# Wires and submodules of const network like another pipeline stage outside pipeline
self.const_network_stage_info = None # StageInfo
# Helper list of wires part of const network just used during prop processes
self.const_network_wire_to_upstream_vars = {}
# Read only global wires (might be volatile or not)
self.read_only_global_network_stage_info = None
self.read_only_global_network_wire_to_upstream_vars = {}
# DELAY STUFF ONLY MAKES SENSE TO TALK ABOUT WHEN:
# - 0 CLKS
# - >0 delay submodules
# ITS NOT CLEAR HOW SLICES ACTUALLY DISTRIBUTE DELAY
# Ex. 1 ns split 2 equal clks?
# 0.3 | 0.3 | 0.3 ?
# 0 | 1 | 0 ? Some raw VHDL is like this
# Also once you are doing fractional stuff you might as well be doing delay ns
# Doing slicing for multiple clocks shouldnt require multi clk pipeline maps anyway right?
# HELP ME
self.zero_clk_per_delay_submodules_map = {} # dict[delay_offset] => [submodules,at,offset]
self.zero_clk_submodule_start_offset = {} # dict[submodule_inst] = start_offset # In delay units
self.zero_clk_submodule_end_offset = {} # dict[submodule_inst] => end_offset # In delay units
self.zero_clk_max_delay = None
def __str__(self):
rv = "Pipeline Map:\n"
for delay in sorted(self.zero_clk_per_delay_submodules_map.keys()):
submodules_insts = self.zero_clk_per_delay_submodules_map[delay]
submodule_func_names = []
for submodules_inst in submodules_insts:
submodule_func_names.append(submodules_inst)
rv += str(delay) + ": " + str(sorted(submodule_func_names)) + "\n"
rv = rv.strip("\n")
return rv
def write_png(self, out_dir, parser_state):
try:
import graphviz
except:
return
s = graphviz.Digraph(
self.logic.func_name,
filename="pipeline_map.gv",
node_attr={"shape": "record"},
)
# Dont bother if more than 128 nodes...
if len(self.logic.submodule_instances) > 128:
return
s.graph_attr["rankdir"] = "LR" # Left to right ordering
# s.graph_attr['splines']="ortho" # Right angle lines...doesnt look right?
# SIZE IO + REGS NODES to be largest font
# Get average bit width (height of node)
smallest_font_pt = 14.0
def get_avg_bit_width(sub_inst, logic, parser_state):
sub_func_name = logic.submodule_instances[sub_inst]
sub_logic = parser_state.FuncLogicLookupTable[sub_func_name]
# See registers estiamte .log
input_ffs = 0
for input_port in sub_logic.inputs:
input_type = sub_logic.wire_to_c_type[input_port]
input_bits = VHDL.C_TYPE_STR_TO_VHDL_SLV_LEN_NUM(
input_type, parser_state
)
input_ffs += input_bits
output_ffs = 0
for output_port in sub_logic.outputs:
output_type = sub_logic.wire_to_c_type[output_port]
output_bits = VHDL.C_TYPE_STR_TO_VHDL_SLV_LEN_NUM(
output_type, parser_state
)
output_ffs += output_bits
return (input_ffs + output_ffs) / 2.0
# Bit width(height) scaling
MIN_AVG_BIT_WIDTH = 999999
MAX_AVG_BIT_WIDTH = 0
for sub_inst, sub_func_name in self.logic.submodule_instances.items():
sub_logic = parser_state.FuncLogicLookupTable[sub_func_name]
if sub_logic.delay is not None and sub_logic.delay > 0: # Skip zero delay
avg_bit_width = get_avg_bit_width(sub_inst, self.logic, parser_state)
if avg_bit_width < MIN_AVG_BIT_WIDTH:
MIN_AVG_BIT_WIDTH = avg_bit_width
if avg_bit_width > MAX_AVG_BIT_WIDTH:
MAX_AVG_BIT_WIDTH = avg_bit_width
# Delay (width) scaling
MIN_NON_ZERO_DELAY = 999999
MAX_DELAY = 0
for sub_inst, sub_func_name in self.logic.submodule_instances.items():
sub_logic = parser_state.FuncLogicLookupTable[sub_func_name]
if sub_logic.delay is not None and sub_logic.delay > 0: # Skip zero delay
if sub_logic.delay < MIN_NON_ZERO_DELAY:
MIN_NON_ZERO_DELAY = sub_logic.delay
if sub_logic.delay > MAX_DELAY:
MAX_DELAY = sub_logic.delay
by_eye_scale = 4.0 # By-eye const adjust...
max_font_pt = smallest_font_pt + (
(MAX_DELAY / MIN_NON_ZERO_DELAY) * by_eye_scale
)
for wire in self.logic.wires:
# Constants(Node)
if C_TO_LOGIC.WIRE_IS_CONSTANT(wire):
# TODO resolve to const str and manually add location on next line
val_str = C_TO_LOGIC.GET_VAL_STR_FROM_CONST_WIRE(
wire, self.logic, parser_state
)
s.node(wire, r"{ " + val_str + " | {<const> CONST}}")
# Inputs(Node)
if wire in self.logic.inputs:
s.node(
wire,
r"{ " + wire + " | {<in> IN}}",
**{"fontsize": str(max_font_pt)},
)
# Clock enable(Node)
if C_TO_LOGIC.LOGIC_NEEDS_CLOCK_ENABLE(self.logic, parser_state):
s.node(
C_TO_LOGIC.CLOCK_ENABLE_NAME,
r"{ " + C_TO_LOGIC.CLOCK_ENABLE_NAME + " | {<in> IN}}",
**{"fontsize": str(max_font_pt)},
)
# Outputs(Node)
if wire in self.logic.outputs:
s.node(
wire,
r"{ {<out> OUT} | " + wire + " }",
**{"fontsize": str(max_font_pt)},
)
# State regs
if wire in self.logic.state_regs:
# s.node(wire, r'{ <in> NEXT | '+wire+' | <out> NOW }')
s.node(
wire + "_in",
r"{ {<in> NEXT} | " + wire + " }",
**{"fontsize": str(max_font_pt)},
)
s.node(
wire + "_out",
r"{ " + wire + " | {<out> NOW} }",
**{"fontsize": str(max_font_pt)},
)
# Submodules/Nodes with ports
for sub_inst, sub_func_name in self.logic.submodule_instances.items():
# Need to lookup input ports, and output ports
# Location
# And total width of inputs/outputs for height
# width is based on delay of func logic
sub_logic = parser_state.FuncLogicLookupTable[sub_func_name]
inputs_text = ""
for input_port in sub_logic.inputs:
inputs_text += f"<{input_port}> {input_port}" + " |"
if C_TO_LOGIC.LOGIC_NEEDS_CLOCK_ENABLE(sub_logic, parser_state):
inputs_text += (
f"<{C_TO_LOGIC.CLOCK_ENABLE_NAME}> {C_TO_LOGIC.CLOCK_ENABLE_NAME}"
+ " |"
)
inputs_text = inputs_text.strip("|")
outputs_text = ""
for output_port in sub_logic.outputs:
outputs_text += f"<{output_port}> {output_port}" + " |"
outputs_text = outputs_text.strip("|")
func_name_text = sub_func_name
c_ast_node_coord = self.logic.submodule_instance_to_c_ast_node[
sub_inst
].coord
location_text = ( # str(os.path.basename(c_ast_node_coord.file)) + r'\n' +
"line "
+ str(c_ast_node_coord.line)
+ " "
+ "col. "
+ str(c_ast_node_coord.column)
)
avg_bit_width = get_avg_bit_width(sub_inst, self.logic, parser_state)
width = 1
height = 1
font_pt = smallest_font_pt
if sub_logic.delay is not None and sub_logic.delay > 0:
width = float(sub_logic.delay) / MIN_NON_ZERO_DELAY
font_pt += width * by_eye_scale
width *= by_eye_scale
height = float(avg_bit_width) / MIN_AVG_BIT_WIDTH
height /= by_eye_scale
ns = float(sub_logic.delay) / DELAY_UNIT_MULT
shape_text = f"{ns:.1f}ns x ~{avg_bit_width}bits"
s.node(
sub_inst,
r"{{"
+ inputs_text
+ r"} | "
+ func_name_text
+ r"\n"
+ location_text
+ r"\n"
+ shape_text
+ r"| {"
+ outputs_text