-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathplist.e
1915 lines (1869 loc) · 81.9 KB
/
plist.e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
--
-- plist.e
-- =======
--
-- Create a text listing file for the recently compiled program.
--
-- (key constants here are: dumpVM, dumpSymTab, doOneInclude, and
-- showlinetable)
--
-- This file is part of p.exw. It is invoked after compilation, when
-- -listing (or -d, -dump, -list) is specified on the command line.
-- (-c, -norun are implied. If you have a problem which only occurs
-- when interpreted but not when compiled you probably need to let
-- me deal with it.)
-- Update: "-d!" option added for "interpretative listing" of the
-- non-optimised code generated when interpreting.
--
-- Note: The listing file is designed to be human-readable; there is
-- no intention whatsoever of creating a file that can be assembled.
-- The .exe file is **not** created from list.asm, but vice-versa.
-- It has the .asm extension mainly because I already had a syntax
-- file (FASM.syn) for Edita. Note: asm does not have escape chars,
-- so eg "error name!=\"fred\"" is not coloured the same way in a
-- list.asm file as it would be in list.exw. This is not a bug,
-- except perhaps of the "don't use .asm for two things" kind.
--
-- The listing is arranged to match your source files, if you look
-- carefully at the memory locations you will see they are far from
-- contiguous. There are no data declarations anywhere, instead the
-- hll identifiers are used, and no existing assembler could manage
-- to disambiguate the numerous "i" of "for i=" fame which tend to
-- be scattered throughout hll code. That said, I have no problem
-- with you trying to do something clever with the listing file, I
-- just want to avoid (being accused of) misleading anyone.
--
-- The resulting text file may be useful for debugging purposes, but
-- that is not the primary goal. I use it more often to help modify
-- the compiler to generate better code.
--
-- The listing files are not an excuse for you to shout at me over
-- some poor quality code being emitted! If you think you can do
-- better, well, that is what the listing/source/etc is there for.
--
-- I would also urge you to use common sense, although perhaps I am
-- quite guilty of not doing that. In all probability, saving some
-- half-a-dozen bytes off some rare code fragment will never make
-- any real-world program any faster; in the worst case everyone
-- has a go and we end up with a zillion pointless edge cases for
-- no measurable gain, but a much slower compiler... Some of the
-- test programs have already been over-optimised to the point
-- where they don't really test anything anymore.
--
-- Some files, eg pdiag.e, may be shown as - skipped (without debug).
-- Obviously, if you want a listing remove the "without debug" in the
-- indicated source. I should note that you cannot "just print it out
-- anyway, with debug or not", because there is no LineTable, hence
-- there is simply no way to marry up source code with binary, nor
-- for that matter to determine how long the binary actually is.
-- Alternatively: edit DoWithOptions() and skip any OptDebug setting.
-- AlternativeII: always keep a lineTable, irrespective of K_wdb.
--
-- Just so you know, the last time I ran p.exe -list p.exw it created
-- a 10.5MB listing file 116,939 lines long, would probably need over
-- 1750 sheets/3 reams of A4 to print, and take longer to read than
-- War and Peace. The constant doOneInclude is an easy/recommended
-- way to keep listing sizes more manageable, -nodiag even better.
--
-- The "analysis" contains seven columns:
--
-- pairing: np, u, v, uv, plus vu, which is a uv that landed in v.
-- modified registers : \ eax=#01, ecx=#02, edx=#04, ebx=#08,
-- referenced registers : / esp=#10, ebp=#20, esi=#40, edi=#80
-- clocks,
-- cycle, (each block [/source line] starts at 1)
-- dependency, (instruction stalls waiting for prior to finish)
-- *AGI stall* (address generation interlock)
--
-- cycle is only meaningful for straightline code. For example, in
-- cmp [a1],h4 ; u 00 00 2 1
-- jl @f ; v 00 00 1 1
-- call opIchkFail ; v 00 00 1 3
-- @@: <next instruction> ; u 00 00 1 4
-- it should be pretty clear the disassembler has not "carried over"
-- the cycles from the jl to after the call opIchkFail even though
-- in this case it would be the more interesting number, and since
-- opIchkFail does not return, the cycle against @@: is useless.
-- (opIchkFail has since been replaced with opTcFail, btw.)
-- The above also shows an earlier bug in the implementation of
-- cycle - the jl should end in "1 2" as it pairs with the last
-- clock of the cmp, not the first. While this specific instance
-- has now been fixed (search for 10/05/08 in p2asm.e), I believe
-- non-1-clock instructions need more work (see "" ?9/0), though
-- Phix rarely emits non-1-clock instructions anyway.
-- Also note there is no memory dependency checking, eg it claims
-- mov edx,[a1]; mov [a1],ecx occur in the same cycle.
-- (update: psched.e now attempts to remove memory dependencies,
-- a long time before this program gets its mitts on the binary.)
-- (update2: psched.e is now long-time broken)
--
-- Hopefully the dependency and AGI columns are most often blank.
-- Do not overly trust this, though. Since as above I doubt that
-- cycle is properly implemented, AGI stalls may not always be
-- correctly predicted, unless the instruction clocks are all 1.
-- Similar to above, e92movti does not return, hence in eg:
-- cmp ecx,h4 ; uv 00 02 1 5
-- jl @f ; v 00 00 1 5
-- mov esi,#0040C16C (e) ; uv 40 00 1 6
-- mov edx,#0040C14C (e) ; vu 04 00 1 6
-- call e92movti ; v 00 00 1 7
-- @@: mov [ebp+edx*4+20],#0040B1CA ; uv 00 24 1 10 *04*
-- the AGI stall on edx as shown does not in fact occur, though
-- equally there could be one occurring which is not reported.
-- update: call now clears such dependency/agi stall reporting,
-- however any (rare) <modify reg>/jump/<use reg in addr> sets
-- make no attempt to indicate (possible) dependency/agi.
--
-- Let me know if you find anything else which is clearly wrong.
--
--
--DEV this should be a command line option??
constant dumpVM = 0
-- Well, I guessed someone would try it anyway, so I may as well
-- offer a helping hand. Set the above constant to 1 to create a
-- listing of the (closed source) VM - run eg "p p -d test" (you
-- could rebuild the compiler [p -cp] but why bother) & remember
-- to reset the above constant to zero when done.
--
-- There may be other (free) tools you can download that do a
-- better job, so this isn't really giving all that much away.
-- Reconstructing the data declarations is left as an exercise
-- for the reader ;-))
--
-- One thing you might do with such a file is grab the code for
-- a particular opcode, hack at pmain.e/pemit.e to generate a
-- replacement, and then either patch VMep[opXxxxxxx] or do an
-- isILa-style thing. No need to be afraid to ask for help, so
-- long as you are not spitefully trying to rip out licencing..
--
--DEV delete this protectionist codswallop!
-- (replace with something along the lines of:
-- I have no intention of releasing the asm sources of the
-- backend, instead preferring to migrate desired chunks to
-- hll/#ilasm code, as and when needed. I will gladly drop
-- any licencing code (specifically the "enforced" part of
-- the "enforced open source" concept) in return for help
-- with this migration, that is at the point when it becomes
-- 100% Phix open source code with no need for fasm etc.)
-- Fwiw, it is probably the trivia I am most protective of.
-- As far as I am concerned, any fool can write a block of code
-- to perform a particular task, it is the subtleties between that
-- count. As an example, the pertinent address after a file full
-- error is at [esp+36]. Hundreds of similar cases represent, at
-- least in my eyes, real value, months if not years of effort
-- (such work is considered incomplete and ongoing, btw). If you
-- can actually name something you want to experiment with then
-- it is probably something I would be comfortable releasing.
-- (abeit perhaps with somewhat watered-down error handling)
-- While admittedly I have not yet finalised or tested the means
-- of making this a practical reality, it goes something like:
--
-- * Set dumpVM to 1, rebuild via p -cp, then compile any
-- program under -d, and open the list2.asm file just
-- created. You will need to reset dumpVM to 0 and
-- reuild again for the final phase.
-- * Locate the code you want to experiment with. I may be
-- able to point you in the right direction, or someone
-- else may have worked in this area and left you an
-- already half-finished present. See note below.
-- * Hack the code into #ilasm format. For an example, see
-- the one in pttree.e: that was itself hacked from a
-- listing of pttree.e, of course! During this process,
-- be prepared to add the odd new opcode to init_il().
-- * Create/save a ttidx (or two) for the opcode(s) under
-- attack, say in init_il(), for matching against a
-- label in your #ilasm code - remember the routine
-- number and offset in DoRoutineDef().
-- * Modify pemit.e to hook your new code where the old
-- opcode was. Use p -d test and inspect the resulting
-- list.asm until everything is perfect. This is the
-- trickiest part and always involves some trial and
-- error. It makes a great deal of sense to first get
-- everything working as-is, before attempting any
-- modifications to the given assembly code.
-- * Note that some error handling, unassigned variables
-- in particular, relies on exception handling which
-- is not available to #ilasm constructs. If needed,
-- add explicit tests and calls to opRTErn (examples
-- in pprntf.e). This is of course expected to change
-- when (/if) proper exception handling is supported
-- in the hll language, in which case exceptions that
-- occur within a #ilasm construct would probably be
-- caught via a surrounding hll try/catch statement.
--
-- Once you are happy with your modifications, submit them to
-- me for inclusion in the official root. When I have time, I
-- will (provided I agree with them) ship them in pilasm.e,
-- as per the comments at the top of that non-compilable and
-- unsupported file, for the basis of future experimentation.
--
--
-- The listing may also be of some minor help if you get a fatal
-- exception in the range #403000 to #40A000 (the VM is supposed
-- to trap any that can occur in that range).
--
--
-- Possible future plans:
-- ======================
--
-- Improved notion of integer variables and intermediates.
-- Generally speaking the most gains can be found by detecting more
-- and more things (variables) that are short integer. At the moment
-- even i=j+k+l causes problems because it does not know that (j+k)
-- is an integer, ie/eg in #3F+#3F+(-#3F) all the components and the
-- result are integer, but there is an intermediate which is not.
-- By "problems" I just mean i=j+k+m is slower than i=j+k i+=m;
-- a test program gives 4.03:1.37[!], on RDS I get 4.65:3.12.
--
-- Smarter type handling. At the moment the compiler has a very static
-- view of whether something is an integer or not. It ought to be able
-- to do something like this: [DEV: done, a while back: see pltype.e]
--
-- object x
-- if atom(x) then -- mark x as atom
-- if integer(x) then -- mark x as integer
-- end if -- reset x as atom
-- else -- mark x as sequence (not atom)
-- if string(x) then -- mark x as string
-- else -- mark x as dword_seq (sequence but not string)
-- end if -- reset x as sequence
-- end if -- reset x as object
--
-- Seems to require multiple (6) overlapping chains, possibly more.
-- I doubt there is much gain in the (even) more complex model:
--
-- object x
-- if cond1 and atom(x) and cond2 then
-- -- treat x as atom here
-- else
-- -- back to treating x as object here
-- -- ie: saying x is "not atom" here would be wrong.
-- end if
--
-- While "integer" is the most important, there is also the
-- potential to avoid typechecking:
--
-- object x
-- atom y
-- if atom(x) then
-- y=x
-- end if
--
-- Obviously this applies to udts as much as builtin types, as long as
-- the udt has no side effects (not currently tested/able for).
--
--
-- Improved notion of 4-byte-per-element sequences. At the moment strings
-- get special treatment because they cannot be dword-sequences. If
-- the compiler could detect sequences which are "not string" better
-- then it could apply similar treatment to them.
-- Update: while significant gains have been seen in the micro-benchmarks,
-- they have not carried through to larger programs. The T_sequence/string
-- gain has mostly been dropped.
--
-- Sequence of integer handling. If the compiler could detect that some
-- sequences only ever had integer elements, it could optimise better.
-- I am actually talking about making the compiler smarter rather than
-- implementing an "of" keyword in the language and throwing typechecks
-- at runtime here. The latter is a big job and a separate issue - I'd
-- personally rather focus on the handling of legacy code, eg win32lib,
-- at least in the first instance.
--
-- Trivial function inlining. If the code actually being invoked is
-- smaller than the opFrame/Move/Call currently emitted, then it
-- should be wiser just to duplicate the binary. Of course this
-- is only possible for previously declared routines and probably
-- too difficult except for at most single parameter routines.
-- We could easily avoid calling procedures with null bodies.
-- We should not need to worry about non-leaf or recursive routines
-- since they would immediately fail the size test, which means we
-- also need not worry about using the existing parameters and/or
-- return var - there is no way they could be active, though we
-- might want to direct store rather than go via the return var.
-- It may also be necessary to restructure the compiler so that
-- some of the work carried out by scanforShortJmp and blurph
-- in pemit.e is done earlier, for the length() tests.
--
-- Peephole optimisation. The routines emitHex1..emitHex6 in pemit.e
-- are currently utterly ignorant of what passes through them.
-- They could buffer the output, accept additional parameters to
-- detail what is being affected (ie/eg what registers are being
-- modified and what registers are being referenced, what the
-- registers actually contain, and/or pairing, clocks, etc) and
-- use this to generate improved code. I would say that generally
-- you are perfectly free to blur the boundary of code emitted by
-- one line and the next, as long as calls to opCodes are kept in
-- order and the LineTab covers the opCodes, eg in
-- call opCode1 -- from line 17
-- mov [bluh],eax -- from line 17, code point A
-- mov edi,[bleh] -- from line 18, code point B
-- call opCode2 -- from line 18, code point C
-- Then the LineTab entry for line 18 can be at A, B or C, and
-- A and B can be switched if desired. This even holds true for
-- the opLnt/Lnp/Lnpt opcodes. Note that this program (alone?)
-- demands that LineTab entries fall on instruction boundaries,
-- it may be possible to relax that rule if necessary.
-- Of course if you change the distance between A and C then the
-- LineTab must be updated to match.
-- Note that many opcodes have strange rules, which may hamper
-- things, such as "NB p1 obtained from [esp]-15 on error".
-- Some of the worst offenders at this could be changed; I have
-- recently changed opTcFail for a very similar reason.
-- As far as I have thought this through, I think the best plan
-- would be for the emitHex() routines to slap code into s5 as
-- they do now, but maintain a mini table of {offset,length},
-- and pepper pmain.e with HexFlush() or something.
--
-- Further to the above, it may make sense to emit "long" forms
-- of certain instructions rather than the shorter eax-specifics,
-- eg see loadReg(), ie #8B 05 xx xx xx xx works just as well as
-- #A1 xx xx xx xx for mov eax,[N]. The point of doing this would
-- be to effectively throw reg-agnostic code at the peepholer,
-- which could then "rename" register use when it spots that var
-- x needs to be in (say) edi later on. Note however that packing
-- such back to short eax form might prove difficult as there is
-- no isDead-1-byte marker and it may prove more costly to add one
-- than this gains, though if just leaving the #8B05's as is gets
-- us a measureable gain (in say p.exe rather than just a micro-
-- benchmark) then we'll gladly take it. I must admit that the
-- more I think of this, the more impractical it all seems, but
-- after all it is just a suggestion.
--
-- Second stage. Either alternatively or in addition to peephole,
-- and this is similar to what scanforShortJmp()/blurph() in
-- pemit.e do, after an s5 is complete, possibly with some idea
-- of improved type info, desires for loop hoisting, etc the
-- s5 bytes can be copied one by one fairly quickly/easily to
-- a new improved s5. As a quick example, for i=1 to 1000 do
-- k=j end for does not want to test j is initialised every
-- iteration, better to (say) opIchk it once at the start,
-- so somewhere you build a list of code insertions that you
-- realised were wanted a tad too late to actually emit in
-- the first pass, plug them in during this second stage.
-- A simpler approach might be to plant a modest isDead block
-- at the top of each routine or loop, bolt the insert code
-- onto the very end of the s5 block, and patch that isDead
-- to a jmp.
--
-- Register allocation (probably linear scan) might be rather nice.
-- You should assume that calling an opcode trashes all registers,
-- unless it says otherwise somewhere. See also the comment about
-- "long" forms above.
--
-- Refless temps. There are some places, most noticeably conditions,
-- where an incref is unnecessary. Note that in say a=append(a,tmp)
-- it is critical that tmp has a refcount - if it happened to be,
-- for instance, a copy of a without an incref, then opApnd could
-- create an illegal circular reference (iirc, opApnd does test
-- for p2!=p3, but does not also test for [p2]!=[p3], instead
-- it relies on the refcount to avoid that and other cases).
-- Example: "if a[3]=x" has no need whatsoever to incref(a[3]).
-- There are some wierd cases such as "if a[3]=modifya3() then",
-- which do not particularly bother me except that if it is going
-- to crash, it had better behave consistently when interpreted
-- and compiled, ie/and/or use normal incref'd tmps over function
-- calls etc. It is of course only the common/simple cases that
-- we really need to care about.
--
-- With 20/20 hindsight, I realise that opJnotx and opJifx are
-- actually a specific implementation of this and could perhaps
-- be removed once this is in place (subject to timing tests).
--
-- Also note there is a temp ref optimisation already in place:
-- p(s[1])
-- makes a ref-counted copy of s[1] in a temp, then ParamList
-- moves it into the parameter of p() and zeroes it, avoiding
-- the incref/decref one stage later. Any change in this area
-- should not upset that (for more details see emitHexMov).
--
-- Deallocate all temps and function results asap. Particularly in
-- the case of a function result, it is decref/deallocated at
-- each return statement. It might be much better to do it once
-- at the start. There are almost certainly similar situations
-- with other tmps, but as the previous paragraphs explained it
-- can all get a bit fuzzy.
--
--[DONE, I THINK:]
-- Log presence of possible forward calls to improve gvar handling.
-- Consider the following listing fragment:
--
-- ; 33 atom ai, af
-- ; 34 ai=1
-- xor ecx,ecx ;#0040B000: 063311 uv 02 02 1 1
-- mov edx,[#0040C2B4] (ai) ;#0040B002: 213025 B4C24000 vu 04 00 1 1
-- mov [#0040C2B4] (ai), dword 1 ;#0040B008: 307005 B4C24000 01000000 u 00 00 1 2
-- cmp edx,h4 ;#0040B012: 201372 00000040 vu 00 04 1 2
-- jle #0040B026 ;#0040B018: 176 0C v 00 00 1 3
-- sub dword [ecx+edx*4-8],1 ;#0040B01A: 203154221 F8 01 u 00 06 3 4
-- jne #0040B026 ;#0040B01F: 165 05 v 00 00 1 6
-- call deallocX ;#0040B021: 350 1683FFFF v 00 00 1 7
--
-- At first it seems clear that the cmp edx,h4 .. call deallocX will never
-- trigger and just mov [ai],1 ought to be enough. However there may be a
-- forward call to a routine which sets ai to 1.1+1.2, hence removing the
-- cmp edx,h4 etc could cause a memory leak since that 2.3 would never be
-- freed. Note that tvars have much tighter scope rules which mean that a
-- similar situation is simply not possible. There is some code towards
-- the end of Assignment() which does the above optimisation for tvars,
-- look for elsif ExitBP=-1 and TableEntry<0 ... onDeclaration=1.
-- The proposal (for similar handling of gvars) is kept simple:
-- Once any possible forward calls are detected, we have to dealloc:
--
-- fwd_routine()
-- atom a1, a2
-- a1=1 -- need h4
-- procedure fwd_routine().... end procedure
-- a2=1 -- omit h4 if S_set bit clear
--
-- In other words once all possible forward calls have been resolved, then
-- we can revert to omitting the provably unnecessary code. In the first
-- instance I would suggest maintaining a count of things we threw on a
-- backpatch list less how many we backpatched, but there is clearly
-- something pertaining to routine_id which at the moment escapes me,
-- possibly a list of routine names to look out for with a "-1" thing
-- for the routine_id("do"&Name[idx]) cases, and something else ontop
-- for "integer r_Proc ... call_proc(r_Proc,{}) ... r_Proc=r_id()".
-- AHA: the r_Proc case is "forever" - which is what I was missing.
-- Probably the trickiest thing would be to spot/ignore forward calls
-- from fallow code, however if you get the called/defined bounds right
-- then you can probably rely on the programmer moving gvar assignment
-- out of such ranges.
--
-- Note: This is probably precisely the sort of "gotcha" that accounts
-- for a certain person's notorious reluctance to permit forward calls,
-- and/or yield -1 for routine_id("fwd_routine") until after the
-- actual routine definition. It is probably true that "atom a1=1" and
-- "atom a1 a1=1" already exhibit a problem of this nature and require
-- a fix along these lines to avoid it, in TopDecls().
--
--DEV: newBase killed this, I think...
-- Detect prepend use and propagate it (eg if b=a occurs after a=prepend
-- then treat as if b=prepend had also occurred). Also slices that do
-- not start with a literal 1. Variables not affected in any way by
-- such operations would not have to load the base (search pmain.e
-- for "-20]"). Not sure any gain would be even measurable.
--
-- Overlapped typechecks, eg (currently, 4 clocks):
-- procedure check5(integer a1, integer b2)
-- mov eax,[a1]
-- cmp eax,h4
-- jl @f
-- mov edx,a1
-- call opTcFail
-- @@:
-- mov ecx,[b2]
-- cmp ecx,h4
-- jl @f
-- mov edx,b2
-- call opTcFail
-- @@:
-- ...
-- call opRetf
--
-- should be slightly faster (3 clocks) as:
-- procedure check5(integer a1, integer e5)
-- mov eax,[a1]
-- mov ecx,[b2]
-- cmp eax,h4
-- jl @f
-- mov edx,a1
-- call opTcFail
-- @@:
-- cmp ecx,h4
-- jl @f
-- mov edx,b2
-- call opTcFail
-- @@:
-- ...
-- call opRetf
--
-- or even (also 3 clocks, but less "cold code" in the cache):
-- procedure check5(integer a1, integer e5)
-- L0:
-- mov eax,[a1]
-- mov ecx,[b2]
-- cmp eax,h4
-- jge L1
-- cmp ecx,h4
-- jge L2
-- ...
-- call opRetf
-- L1:
-- mov edx,a1
-- jmp @f
-- L2:
-- mov edx,b2
-- @@:
-- push L0 ;; fake return addr
-- jmp opTcFail
--
-- I have to admit this is possibly alot of work for small gain.
--
-- [ED: Erm, that is enough for now.] I trust you've gotten the general idea
-- by now that the possibilities are boundless.
--
constant dumpSymTab = 01,
doOneInclude = 0, -- NB "without debug" prevents listing, see above
-- doOneFileName = "ptok.e"
-- doOneFileName = "pmain.e"
-- doOneFileName = "pemit.e"
-- doOneFileName = "machine.e"
-- doOneFileName = "plist.e"
-- doOneFileName = "pdiag.e"
-- doOneFileName = "pttree.e"
-- doOneFileName = "pgscan.e"
-- doOneFileName = "pilxl.e"
-- doOneFileName = "p.exw"
-- doOneFileName = "eafonts.ew"
-- doOneFileName = "p2asm.e"
-- doOneFileName = "psym.e"
-- doOneFileName = "pmsgs.e"
-- doOneFileName = "pgets0.ew"
-- doOneFileName = "pltype.e"
-- doOneFileName = "pilx86.e"
doOneFileName = "t01type.e",
showlinetable = 0
global sequence vmap -- variable map; var address --> offset into threadstack
-- (a flat array of all static and dynamic var refs)
-- ==> index into symtab for var name, type, etc.
global sequence codeTable, codeIdx
global integer outFrame = 0
global integer inFrame = 0
include p2asm.e -- dissassembler engine
integer listfn -- main xxx.asm output file
atom base1, -- effective address of cs[1]
tIL, -- effective address of current top_level_sub
sIL -- effective address of current subroutine
object si -- copy of symtab[i], speedwise
integer fNo -- scratch (copy of symtab[i][S_FPno])
string name, -- scratch (copy of filenames[i][2])
path -- scratch (copy of filepaths[f[i][1]])
sequence ctrl -- sorted table of routine entries:
constant C_Fno = 1, -- file no (idx to filetab, allfiles, etc) (from symtab[i][S_FPno])
C_lNo = 2, -- first line number (from symtab[i][S_1stl])
C_sti = 3, -- symtab index
C_wdb = 4 -- modified K_wdb flag
integer cidx -- idx to ctrl
sequence ci -- copy of ctrl[cidx]
integer newBlock
newBlock = 1
constant ALIGNASM = 1
function findboth(string d1word, string tjword, sequence stringset)
return (find(d1word,stringset) and find(tjword,stringset))
end function
-- 29/5/14:
procedure alignasm(sequence dres, string tj)
string dres1 = dres[1]
integer ch
integer d1pos=-1, tjpos=-1
string d1word="", tjword=""
string fmt = " %-36s ;#%08x: %-26s %s\n"
for i=1 to length(dres1) do
ch = dres1[i]
if ch!=' ' then
d1pos = i
d1word = dres1[i..find(' ',dres1,i+1)-1]
exit
end if
end for
for i=1 to length(tj) do
ch = tj[i]
if ch='#' and match("#ilASM{",tj)=i then
tj[i..i+6] = " "
ch = ' '
end if
if ch!=' ' then
tjpos = i+4
tjword = tj[i..find(' ',tj,i+1)-1]
if length(tjword) and tjword[$]='\n' then
tjword = tjword[1..$-1]
end if
exit
end if
end for
--DEV if we findboth(), why not substitute into dres?
if length(d1word)
and (d1word=tjword or
-- {d1word,tjword}={"call","testN()"} or
findboth(d1word,tjword,{"je","jz"}) or
findboth(d1word,tjword,{"jne","jnz"}) or
findboth(d1word,tjword,{"jb","jc","jnae"}) or
findboth(d1word,tjword,{"jae","jnb","jnc"}) or
findboth(d1word,tjword,{"ja","jnbe"}) or
findboth(d1word,tjword,{"jbe","jna"}) or
findboth(d1word,tjword,{"jge","jnl"}) or
findboth(d1word,tjword,{"jle","jng"}) or
findboth(d1word,tjword,{"jl","jnge"}) or
findboth(d1word,tjword,{"jg","jnle"}) or
findboth(d1word,tjword,{"jp","jpe"}) or
findboth(d1word,tjword,{"jnp","jpo"}) or
findboth(d1word,tjword,{"sete","setz"}) or
findboth(d1word,tjword,{"setne","setnz"}) or
findboth(d1word,tjword,{"setb","setc","setnae"}) or
findboth(d1word,tjword,{"setae","setnb","setnc"}) or
findboth(d1word,tjword,{"seta","setnbe"}) or
findboth(d1word,tjword,{"setbe","setna"}) or
findboth(d1word,tjword,{"setge","setnl"}) or
findboth(d1word,tjword,{"setle","setng"}) or
findboth(d1word,tjword,{"setl","setnge"}) or
findboth(d1word,tjword,{"setg","setnle"}) or
findboth(d1word,tjword,{"setp","setpe"}) or
findboth(d1word,tjword,{"setnp","setpo"}) or
findboth(d1word,tjword,{"lea","mov"}))
and tjpos>d1pos then
if not find(tjword,{"lea","mov"}) then
dres[1][d1pos..d1pos+length(d1word)-1] = tjword
end if
d1word = repeat(' ',tjpos-d1pos)
puts(listfn,d1word)
d1pos = match(d1word,dres1,d1pos)
if length(dres1)<=36-length(d1word) then
fmt = sprintf(" %%-%ds ;#%%08x: %%-26s %%s\n",36-length(d1word))
else
fmt = " %s ;#%08x: %-26s %s\n"
end if
end if
printf(listfn,fmt,dres)
end procedure
function is_data(atom v)
return (v>=ImageBase2+BaseOfData2 and v<=ImageBase2+BaseOfData2+SizeOfData2)
end function
--if newEmit then
-- r_isdata = routine_id("is_data")
--end if
function is_code(atom v)
--if v=4202626 then
-- ?{v,ImageBase2,BaseOfCode2,SizeOfCode2}
--end if
return (v>=ImageBase2+BaseOfCode2 and v<=ImageBase2+BaseOfCode2+SizeOfCode2)
end function
--if newEmit then
-- r_iscode = routine_id("is_code")
--end if
--with trace
--DEV temp (22/2/15)
procedure disassemble(integer eaddr, integer fromoffset, integer tooffset, string tj)
--procedure disassemble(integer eaddr, atom fromoffset, atom tooffset, string tj)
sequence dres, dres1
integer last, ro,rc,so,sc
atom ctrl1
integer machine
--trace(1)
if newBlock then
--DEV need to set the machine!
if newEmit then
r_isdata = routine_id("is_data")
r_iscode = routine_id("is_code")
machine = 32
if X64 then
machine = 64
end if
--printf(1,"disassemble(eaddr=%08x,fromoffset=%08x,tooffset=%08x)\n",{eaddr,fromoffset,tooffset})
decodeinit(eaddr+fromoffset, eaddr-base1+fromoffset,machine,iff(PE?arch_PE:arch_ELF))
else
decodeinit(eaddr+fromoffset, eaddr-base1+fromoffset)
end if
newBlock = 0
end if
last = eaddr+1+tooffset
if dumpVM then
ctrl1 = ctrl[1][1]
--?ctrl1
end if
while addr<last do
if dumpVM then
if addr=ctrl1 then
--DEV always true?
-- if string(ctrl[1][2]) then
dres = ctrl[1][2]
printf(listfn,"%s::\n;-%s\n",{dres,repeat('-',length(dres))})
-- end if
--DEV should this be done at the ctrl = sort(ctrl) stage?
-- while 1 do
ctrl = ctrl[2..length(ctrl)]
-- if length(ctrl)=0 then exit end if
-- if string(ctrl[1][2]) then exit end if
-- end while
if length(ctrl) then
ctrl1 = ctrl[1][1]
end if
end if
dres = decode() -- {asm,addr,hex,analysis}
else
dres = decode() -- {asm,addr,hex,analysis}
--DEV newEmit:
--DEV oops:
--if X64=0 then
--if wasOpCode=opCallOnce then trace(1) end if
if (wasOpCode=opCallOnce or wasOpCode=opFrame or wasOpCode=opTchk)
and (lastmov>1 and lastmov<=length(symtab))
and sequence(symtab[lastmov]) then
si = symtab[lastmov]
if wasOpCode=opCallOnce then
fNo = si[S_FPno]
if fNo=0 then
puts(1,"warning: fNo is 0, plist.e line 721\n")
name = "*** fNo is 0 ***"
else
name = filenames[fNo][2]
end if
--23/4/15:
--6/1/17:
-- elsif si[S_Name]=-1 then
elsif integer(si[S_Name]) then
-- added 1/5/15: (for the "call :%opFrame" in pDiagN.e/:!diagFrame, where edx got set in pemit2.e)
--8/7/15:
--if trim(tj)="call :%opFrame" then
if match("call :%opFrame",trim(tj))=1 then
name = ""
else
--printf(1,"warning: symtab[%d][S_Name] is -1, plist.e line 728\n",{lastmov})
printf(1,"warning: symtab[%d][S_Name] is %d, plist.e line 728\n",{lastmov,si[S_Name]})
?tj
-- name = "??-1??"
name = sprintf("??%d??",si[S_Name])
end if
else
name = si[S_Name]
if wasOpCode=opFrame then
inFrame = lastmov
lastmov = 0 -- added 4/11/14. (nowt specific, just think we should keep this 0 as much as possible. Main mod today was in p2asm.e)
end if
end if
if length(name) then
dres[1] &= " ("&name&")"
end if
wasOpCode = 0
resetmods()
end if
--end if
dres1 = dres[1]
if length(dres1)>36 then -- shorten if possible:
so = match("dword #",dres1)
if so then
dres1[so..so+5] = ""
-- dres1 = dres1[1..so-1] & dres1[so+6..length(dres1)]
dres[1] = dres1
end if
end if
if length(dres1)>36 then -- shorten if possible:
so = find('[',dres1)
sc = find(']',dres1)
ro = find('(',dres1)
rc = find(')',dres1)
if ro=sc+2 and sc>so and rc>ro then
-- convert eg mov [#0040C164] (res:useFlatString), dword 0
-- to mov [res:useFlatString], dword 0
if rc<length(dres1) and dres1[rc+1]=',' and dres1[sc+1]=' ' then
-- "[xxx] (yyy)," -> "[xxx]](yyy),"; pretend xxx 1 longer.
sc += 1
dres1[sc] = ']'
end if
dres1 = dres1[1..so] -- ...[
&dres1[ro+1..rc-1] -- xxx of (xxx)
-- &dres1[ro..rc] -- (xxx)
&dres1[sc..ro-1] -- ]..(-1
&dres1[rc+1..length(dres1)] -- )+1..$
dres[1] = dres1
elsif ro=16 and rc>ro and equal(dres1[1..6],"call #") then
-- convert eg call #0040383A (opFrame) (sq_floor_div)...
-- to call opFrame (sq_floor_div)...
dres1 = dres1[1..5] -- "call "
&dres1[ro+1..rc-1] -- xxx of (xxx)
&dres1[rc+1..length(dres1)] -- )+1..$
dres[1] = dres1
elsif ro=36 and rc>ro and equal(dres1[1..26],"mov [ebp+edx*4+20], dword ") then
-- convert eg mov [ebp+edx*4+20], dword 004038F0 (opRetf)
-- to mov [ebp+edx*4+20], opRetf
dres1 = dres1[1..20] -- "mov [ebp+edx*4+20], "
&dres1[ro+1..rc-1] -- xxx of (xxx)
-- &dres1[ro..rc] -- (xxx)
&dres1[rc+1..length(dres1)] -- )+1..$
dres[1] = dres1
end if
end if
end if
if ALIGNASM then
alignasm(dres, tj)
else
printf(listfn," %-36s ;#%08x: %-26s %s\n",dres)
end if
if terminal then
puts(listfn,"*** ERROR: decode of this segment aborted ***\n")
puts(1,"*** ERROR: decode of this segment aborted ***\n")
-- if getc(0) then end if
addr = last -- suppress message below
exit
end if
-- if addr>=last then exit end if
---- printf(1,"%08x %-22s %s\n",{addr,hex,asm})
--flush(listfn)
end while
if addr!=last then
-- if addr>last then -- no help last time I tried...
puts(listfn,"*** ERROR: decode of this segment ended at wrong address ***\n")
puts(1,"*** ERROR: decode of this segment ended at wrong address ***\n")
-- if getc(0) then end if
end if
end procedure
--global --temp, for pemit2 tests
function unpacklt(sequence linetab)
integer skip = 0, byte
atom word, dword
sequence res = {}
integer base = 0, tmp
for i=1 to length(linetab) do
if skip then
skip -=1
else
byte = linetab[i]
if byte>#7F then
if byte>#81 then
res = append(res,byte-#100)
elsif byte=#81 then
word = linetab[i+1]*#100+linetab[i+2]
if word>#7FFF then
word -= #10000
end if
res = append(res,word)
skip = 2
elsif byte=#80 then
dword = linetab[i+1]*#1000000+linetab[i+2]*#10000+linetab[i+3]*#100+linetab[i+4]
if dword>#7FFFFFFF then
dword -= #100000000
end if
res = append(res,dword)
skip = 4
else
?9/0
end if
else
res = append(res,byte)
end if
end if
end for
for i=1 to length(res) do
if res[i]>0 then
tmp = base
base += res[i]
res[i] = tmp
end if
end for
return res
end function
sequence slt, -- LineTab for next subroutine to be processed
tlt -- LineTab for current top_level_sub
integer sltidx, -- index into slt
tltidx -- index into tlt
integer lti -- scratch (copy of xlt[xltidx])
--, name, text,
integer slNo, -- line no in subroutine to look out for
tlNo -- line no in top_level_sub to look out for
integer slO, -- subroutine code offset next to be processed
tlO -- toip_level_sub code offset next to be processed
integer newSub
procedure getSub(integer i)
--
-- Process the next ctrl entry. If it is a routine in file i, set up
-- slt, slNo, sltidx, etc. (If it is not "" it will be handled at
-- the top of the for i=1 to length(allfiles) loop, ie tlt, tlNo,
-- tltidx, etc will be set up from it).
--
integer slsi -- scratch (idx to symtab)
newSub = 1
newBlock = 1
cidx += 1
slNo = -1
if cidx<=length(ctrl) then
ci = ctrl[cidx]
if ci[C_Fno]=i then
slsi = ci[C_sti] -- symtab index
si = symtab[slsi]
outFrame = slsi
slt = si[S_ltab]
--4/11/19: (structs)
if slt!={} then
sIL = si[S_il]
if newEmit then
if listing!=-1 then
slt = unpacklt(slt)
end if
while length(slt)>1 and slt[$]<0 do
printf(1,"oops, slt[$]<0 (symtab[%d])\n",slsi) --DEV
slt = slt[1..-2]
end while
if not newEmit or listing!=-1 then -- 27/2/15
sIL += BaseOfCode2+ImageBase2
end if
end if
slNo = ci[C_lNo]
lti = slt[1]
sltidx = 1
if equal(slt,{-2}) then
-- slNo = -1 -- no good
getSub(i)
else
-- if lti<0 then
if lti<0 and length(slt)>=2 and slt[2]=0 then
slNo -= lti
sltidx = 2
end if
end if
slO = 0
end if
end if
end if
end procedure
constant tdesc={"S_Const","S_GVar","S_TVar","S_Nspc","S_Rsvd","S_Type","S_Func","S_Proc"}
integer siState -- scratch (copy of symtab[i][S_State]) \
--string sState -- human readable version of siState / Also used for filenames, paths, S_vtype, S_Efct, etc.
sequence sState -- human readable version of siState / Also used for filenames, paths, S_vtype, S_Efct, etc.
procedure stateDesc(sequence txt, integer bit)
if and_bits(siState,bit) then
if length(sState)>1 then sState &= '+' end if
sState &= txt
siState -= bit
end if
end procedure
function namestring(object name)
if not string(name) then
name = sprint(name)
end if
return name
end function
constant day = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"}
--with trace
global procedure dump_listing()
integer siNTyp, -- scratch (copy of symtab[i][S_NTyp])
lNo, -- scratch (copy of symtab[i][S_1stl])
tlsi, -- scratch (idx to symtab)
X, -- scratch, modified and_bits(symtab[i][S_State],K_wdb)
c, -- scratch, var no used to build varmap
k, -- scratch, used for printing S_gInfo
putafter, -- flag: use hll code to split tls and subroutine code...
vmax,
wasRtnLit,
wasNoClr
sequence etxt -- copy of allfiles[i]
string outfile, -- scratch (list[2].asm)
tj, -- scratch (copy of etxt[j])
-- ptxt, -- scratch (printed text)
options -- "-d!"|("-d"[" -nodiag"])
sequence ptxt
object gi -- for printing S_gInfo
atom gi2,gi3 -- ""
integer sidx, last0 -- for new si[i..j]=0 loop
sequence d -- for generated date
sequence wasopNames -- for testall
integer offset
sequence vm_names
string vm_part
integer vm_len
integer sii
string file1
--trace(1)
if dumpVM then
outfile = mainpath&"list2.asm"
else
outfile = mainpath&"list.asm"
end if
printf(1,"creating listing file %s...",{outfile})
listfn = open(outfile,"w")
if listfn=-1 then