forked from cil-project/cil
-
Notifications
You must be signed in to change notification settings - Fork 20
/
cil.ml
executable file
·6953 lines (6107 loc) · 262 KB
/
cil.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(*
Copyright (c) 2001-2003,
George C. Necula <necula@cs.berkeley.edu>
Scott McPeak <smcpeak@cs.berkeley.edu>
Wes Weimer <weimer@cs.berkeley.edu>
Ben Liblit <liblit@cs.berkeley.edu>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The names of the contributors may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
open Escape
open Pretty
open Cilint
(* open Trace (\* sm: 'trace' function *\) *)
module E = Errormsg
module H = Hashtbl
module IH = Inthash
(*
CIL: An intermediate language for analyzing C progams.
Scott McPeak, George Necula, Wes Weimer
*)
(* The module Cilversion is generated automatically by Makefile from
information in configure.in *)
let cilVersion = Cilversion.cilVersion
type cstd = C90 | C99 | C11
let cstd_of_string = function
| "c90" -> C90
| "c99" -> C99
| "c11" -> C11
| _ -> failwith "Not a valid c standard argument."
let cstd = ref C99
let gnu89inline = ref false
let c99Mode () = !cstd <> C90 (* True to handle ISO C 99 vs 90 changes.
c99mode only affects parsing of decimal integer constants without suffix
a) on machines where long and long long do not have the same size
(e.g. 32 Bit machines, 64 Bit Windows, not 64 Bit MacOS or (most? all?) 64 Bit Linux):
giving constants that are bigger than max long type long long in c99mode vs. unsigned long
if c99mode is off.
b) for constants bigger than long long producing a "Unimplemented: Cannot represent the integer"
warning in C99 mode vs. unsigned long long if c99mode is off. *)
(* Set this to true to get old-style handling of gcc's extern inline C extension:
old-style: the extern inline definition is used until the actual definition is
seen (as long as optimization is enabled)
new-style: the extern inline definition is used only if there is no actual
definition (as long as optimization is enabled)
Note that CIL assumes that optimization is always enabled ;-) *)
let oldstyleExternInline = ref false
let makeStaticGlobal = ref true
let useLogicalOperators = ref false
let useComputedGoto = ref false
let useCaseRange = ref false
module M = Machdep
(* Cil.initCil will set this to the current machine description.
Makefile.cil generates the file src/machdep.ml,
which contains the descriptions of gcc and msvc. *)
let envMachine : M.mach option ref = ref None
let lowerConstants: bool ref = ref true
(** Do lower constants (default true) *)
let removeBranchingOnConstants: bool ref = ref true
(** Remove branches of the form if(const) ... else ... (default true) *)
let insertImplicitCasts: bool ref = ref true
(** Do insert implicit casts (default true) *)
let little_endian = ref true
let char_is_unsigned = ref false
let underscore_name = ref false
type lineDirectiveStyle =
| LineComment (** Before every element, print the line
number in comments. This is ignored by
processing tools (thus errors are reproted
in the CIL output), but useful for
visual inspection *)
| LineCommentSparse (** Like LineComment but only print a line
directive for a new source line *)
| LinePreprocessorInput (** Use #line directives *)
| LinePreprocessorOutput (** Use # nnn directives (in gcc mode) *)
let lineDirectiveStyle = ref (Some LinePreprocessorInput)
let print_CIL_Input = ref false
let printCilAsIs = ref false
let lineLength = ref 80
let warnTruncate = ref true
(* sm: return the string 's' if we're printing output for gcc, suppres
it if we're printing for CIL to parse back in. the purpose is to
hide things from gcc that it complains about, but still be able
to do lossless transformations when CIL is the consumer *)
let forgcc (s: string) : string =
if (!print_CIL_Input) then "" else s
let debugConstFold = false
(** The Abstract Syntax of CIL *)
(** The top-level representation of a CIL source file. Its main contents is
the list of global declarations and definitions. *)
type file =
{ mutable fileName: string; (** The complete file name *)
mutable globals: global list; (** List of globals as they will appear
in the printed file *)
mutable globinit: fundec option;
(** An optional global initializer function. This is a function where
you can put stuff that must be executed before the program is
started. This function, is conceptually at the end of the file,
although it is not part of the globals list. Use {!getGlobInit}
to create/get one. *)
mutable globinitcalled: bool;
(** Whether the global initialization function is called in main. This
should always be false if there is no global initializer. When
you create a global initialization CIL will try to insert code in
main to call it. *)
}
and comment = location * string
(** The main type for representing global declarations and definitions. A list
of these form a CIL file. The order of globals in the file is generally
important. *)
and global =
| GType of typeinfo * location
(** A typedef. All uses of type names (through the [TNamed] constructor)
must be preceded in the file by a definition of the name. The string
is the defined name and always not-empty. *)
| GCompTag of compinfo * location
(** Defines a struct/union tag with some fields. There must be one of
these for each struct/union tag that you use (through the [TComp]
constructor) since this is the only context in which the fields are
printed. Consequently nested structure tag definitions must be
broken into individual definitions with the innermost structure
defined first. *)
| GCompTagDecl of compinfo * location
(** Declares a struct/union tag. Use as a forward declaration. This is
printed without the fields. *)
| GEnumTag of enuminfo * location
(** Declares an enumeration tag with some fields. There must be one of
these for each enumeration tag that you use (through the [TEnum]
constructor) since this is the only context in which the items are
printed. *)
| GEnumTagDecl of enuminfo * location
(** Declares an enumeration tag. Use as a forward declaration. This is
printed without the items. *)
| GVarDecl of varinfo * location
(** A variable declaration (not a definition). If the variable has a
function type then this is a prototype. There can be several
declarations and at most one definition for a given variable in C, but
in CIL there is also only at most one declaration per variable. If both
forms appear then they must share the same varinfo structure. A
prototype shares the varinfo with the fundec of the definition. Either
has storage Extern or there must be a definition in this file *)
| GVar of varinfo * initinfo * location
(** A variable definition. Can have an initializer. The initializer is
updateable so that you can change it without requiring to recreate
the list of globals. There can be at most one definition for a
variable in an entire program. Cannot have storage Extern or function
type. *)
| GFun of fundec * location
(** A function definition. *)
| GAsm of string * location (** Global asm statement. These ones
can contain only a template *)
| GPragma of attribute * location (** Pragmas at top level. Use the same
syntax as attributes *)
| GText of string (** Some text (printed verbatim) at
top level. E.g., this way you can
put comments in the output. *)
(** The various types available. Every type is associated with a list of
attributes, which are always kept in sorted order. Use {!addAttribute}
and {!addAttributes} to construct list of attributes. If you want to
inspect a type, you should use {!unrollType} to see through the uses
of named types. *)
and typ =
TVoid of attributes (** Void type *)
| TInt of ikind * attributes (** An integer type. The kind specifies
the sign and width. *)
| TFloat of fkind * attributes (** A floating-point type. The kind
specifies the precision. *)
| TPtr of typ * attributes
(** Pointer type. *)
| TArray of typ * exp option * attributes
(** Array type. It indicates the base type and the array length. *)
| TFun of typ * (string * typ * attributes) list option * bool * attributes
(** Function type. Indicates the type of the result, the name, type
and name attributes of the formal arguments ([None] if no
arguments were specified, as in a function whose definition or
prototype we have not seen; [Some \[\]] means void). Use
{!argsToList} to obtain a list of arguments. The boolean
indicates if it is a variable-argument function. If this is the
type of a varinfo for which we have a function declaration then
the information for the formals must match that in the
function's sformals. *)
| TNamed of typeinfo * attributes
(** The use of a named type. All uses of the same type name must
share the typeinfo. Each such type name must be preceded
in the file by a [GType] global. This is printed as just the
type name. The actual referred type is not printed here and is
carried only to simplify processing. To see through a sequence
of named type references, use {!unrollType}. The attributes
are in addition to those given when the type name was defined. *)
| TComp of compinfo * attributes
(** A reference to a struct or a union type. All references to the
same struct or union must share the same compinfo among them and
with a [GCompTag] global that precedes all uses (except maybe
those that are pointers to the composite type). The attributes
given are those pertaining to this use of the type and are in
addition to the attributes that were given at the definition of
the type and which are stored in the compinfo. *)
| TEnum of enuminfo * attributes
(** A reference to an enumeration type. All such references must
share the enuminfo among them and with a [GEnumTag] global that
precedes all uses. The attributes refer to this use of the
enumeration and are in addition to the attributes of the
enumeration itself, which are stored inside the enuminfo *)
| TBuiltin_va_list of attributes
(** This is the same as the gcc's type with the same name *)
(** Various kinds of integers *)
and ikind =
IChar (** [char] *)
| ISChar (** [signed char] *)
| IUChar (** [unsigned char] *)
| IBool (** [_Bool (C99)] *)
| IInt (** [int] *)
| IUInt (** [unsigned int] *)
| IShort (** [short] *)
| IUShort (** [unsigned short] *)
| ILong (** [long] *)
| IULong (** [unsigned long] *)
| ILongLong (** [long long] (or [_int64] on Microsoft Visual C) *)
| IULongLong (** [unsigned long long] (or [unsigned _int64] on Microsoft
Visual C) *)
| IInt128 (** [__int128] *)
| IUInt128 (** [unsigned __int128] *)
(** Various kinds of floating-point numbers*)
and fkind =
FFloat (** [float] *)
| FDouble (** [double] *)
| FLongDouble (** [long double] *)
| FComplexFloat (** [float _Complex] *)
| FComplexDouble (** [double _Complex] *)
| FComplexLongDouble (** [long double _Complex]*)
(** An attribute has a name and some optional parameters *)
and attribute = Attr of string * attrparam list
(** Attributes are lists sorted by the attribute name *)
and attributes = attribute list
(** The type of parameters in attributes *)
and attrparam =
| AInt of int (** An integer constant *)
| AStr of string (** A string constant *)
| ACons of string * attrparam list (** Constructed attributes. These
are printed [foo(a1,a2,...,an)].
The list of parameters can be
empty and in that case the
parentheses are not printed. *)
| ASizeOf of typ (** A way to talk about types *)
| ASizeOfE of attrparam
| ASizeOfS of typsig (** Replacement for ASizeOf in type
signatures. Only used for
attributes inside typsigs.*)
| AAlignOf of typ
| AAlignOfE of attrparam
| AAlignOfS of typsig
| AUnOp of unop * attrparam
| ABinOp of binop * attrparam * attrparam
| ADot of attrparam * string (** a.foo **)
| AStar of attrparam (** * a *)
| AAddrOf of attrparam (** & a **)
| AIndex of attrparam * attrparam (** a1[a2] *)
| AQuestion of attrparam * attrparam * attrparam (** a1 ? a2 : a3 **)
(** Information about a composite type (a struct or a union). Use
{!mkCompInfo}
to create non-recursive or (potentially) recursive versions of this. Make
sure you have a [GCompTag] for each one of these. *)
and compinfo = {
mutable cstruct: bool; (** True if struct, False if union *)
mutable cname: string; (** The name. Always non-empty. Use
{!compFullName} to get the
full name of a comp (along with
the struct or union) *)
mutable ckey: int; (** A unique integer constructed from
the name. Use {!Hashtbl.hash} on
the string returned by
{!compFullName}. All compinfo
for a given key are shared. *)
mutable cfields: fieldinfo list; (** Information about the fields *)
mutable cattr: attributes; (** The attributes that are defined at
the same time as the composite
type *)
mutable cdefined: bool; (** Whether this is a defined
compinfo. *)
mutable creferenced: bool; (** True if used. Initially set to
false *)
}
(** Information about a struct/union field *)
and fieldinfo = {
mutable fcomp: compinfo; (** The compinfo of the host. Note
that this must be shared with the
host since there can be only one
compinfo for a given id *)
mutable fname: string; (** The name of the field. Might be
the value of
{!missingFieldName} in which
case it must be a bitfield and is
not printed and it does not
participate in initialization *)
mutable ftype: typ; (** The type *)
mutable fbitfield: int option; (** If a bitfield then ftype should be
an integer type *)
mutable fattr: attributes; (** The attributes for this field
(not for its type) *)
mutable floc: location; (** The location where this field
is defined *)
}
(** Information about an enumeration. This is shared by all references to an
enumeration. Make sure you have a [GEnumTag] for each of of these. *)
and enuminfo = {
mutable ename: string; (** The name. Always non-empty *)
mutable eitems: (string * exp * location) list; (** Items with names
and values. This list
should be
non-empty. The item
values must be
compile-time
constants. *)
mutable eattr: attributes; (** Attributes *)
mutable ereferenced: bool; (** True if used. Initially set to false*)
mutable ekind: ikind;
(** The integer kind used to represent this enum. Per ANSI-C, this
should always be IInt, but gcc allows other integer kinds *)
}
(** Information about a defined type *)
and typeinfo = {
mutable tname: string;
(** The name. Can be empty only in a [GType] when introducing a composite
or enumeration tag. If empty cannot be referred to from the file *)
mutable ttype: typ;
(** The actual type. *)
mutable treferenced: bool;
(** True if used. Initially set to false*)
}
(** Information about a variable. These structures are shared by all
references to the variable. So, you can change the name easily, for
example. Use one of the {!makeLocalVar}, {!makeTempVar} or
{!makeGlobalVar} to create instances of this data structure. *)
and varinfo = {
mutable vname: string; (** The name of the variable. Cannot
be empty. *)
mutable vtype: typ; (** The declared type of the
variable. *)
mutable vattr: attributes; (** A list of attributes associated
with the variable. *)
mutable vstorage: storage; (** The storage-class *)
(* The other fields are not used in varinfo when they appear in the formal
argument list in a [TFun] type *)
mutable vglob: bool; (** True if this is a global variable*)
mutable vinline: bool; (** Whether this varinfo is for an inline function. *)
mutable vdecl: location; (** Location of variable declaration *)
vinit: initinfo;
(** Optional initializer. Only used for static and global variables.
Initializers for other types of local variables are turned into
assignments. *)
mutable vid: int; (** A unique integer identifier. *)
mutable vaddrof: bool; (** True if the address of this
variable is taken. CIL will set
these flags when it parses C, but
you should make sure to set the
flag whenever your transformation
create [AddrOf] expression. *)
mutable vreferenced: bool; (** True if this variable is ever
referenced. This is computed by
[removeUnusedVars]. It is safe to
just initialize this to False *)
mutable vdescr: doc; (** For most temporary variables, a
description of what the var holds.
(e.g. for temporaries used for
function call results, this string
is a representation of the function
call.) *)
mutable vdescrpure: bool; (** Indicates whether the vdescr above
is a pure expression or call.
True for all CIL expressions and
Lvals, but false for e.g. function
calls.
Printing a non-pure vdescr more
than once may yield incorrect
results. *)
mutable vhasdeclinstruction: bool; (** Indicates whether a VarDecl instruction
was generated for this variable.
Only applies to local variables.
Currently, this is relevant for when to
print the declaration. If this is
true, it might be incorrect to print the
declaration at the beginning of the
function, rather than were the VarDecl
instruction is. This was originally
introduced to handle VLAs. *)
}
(** Storage-class information *)
and storage =
NoStorage (** The default storage. Nothing is
printed *)
| Static
| Register
| Extern
(** Expressions (Side-effect free)*)
and exp =
Const of constant (** Constant *)
| Lval of lval (** Lvalue *)
| SizeOf of typ (** sizeof(<type>). Has [unsigned
int] type (ISO 6.5.3.4). This is
not turned into a constant because
some transformations might want to
change types *)
| Real of exp (** __real__(<expression>) *)
| Imag of exp (** __imag__(<expression>) *)
| SizeOfE of exp (** sizeof(<expression>) *)
| SizeOfStr of string
(** sizeof(string_literal). We separate this case out because this is the
only instance in which a string literal should not be treated as
having type pointer to character. *)
| AlignOf of typ (** Has [unsigned int] type *)
| AlignOfE of exp
| UnOp of unop * exp * typ (** Unary operation. Includes
the type of the result *)
| BinOp of binop * exp * exp * typ
(** Binary operation. Includes the
type of the result. The arithmetic
conversions are made explicit
for the arguments *)
| Question of exp * exp * exp * typ
(** (a ? b : c) operation. Includes
the type of the result *)
| CastE of typ * exp (** Use {!mkCast} to make casts *)
| AddrOf of lval (** Always use {!mkAddrOf} to
construct one of these. Apply to an
lvalue of type [T] yields an
expression of type [TPtr(T)] *)
| AddrOfLabel of stmt ref
| StartOf of lval (** There is no C correspondent for this. C has
implicit coercions from an array to the address
of the first element. [StartOf] is used in CIL to
simplify type checking and is just an explicit
form of the above mentioned implicit conversion.
It is not printed. Given an lval of type
[TArray(T)] produces an expression of type
[TPtr(T)]. *)
and wstring_type = | Wchar_t | Char16_t | Char32_t
and encoding = No_encoding | Utf8
(** Literal constants *)
and constant =
| CInt of cilint * ikind * string option
(** Integer constant. Give the ikind (see ISO9899 6.1.3.2)
and the textual representation, if available. Use
{!integer} or {!kinteger} to create these. *)
| CStr of string * encoding (** String constant (of pointer type) *)
| CWStr of int64 list * wstring_type (** Wide string constant (of type "wchar_t *") *)
| CChr of char (** Character constant. This has type int, so use
charConstToInt to read the value in case
sign-extension is needed. *)
| CReal of float * fkind * string option (** Floating point constant. Give
the fkind (see ISO 6.4.4.2) and
also the textual representation,
if available *)
| CEnum of exp * string * enuminfo
(** An enumeration constant with the given value, name, from the given
enuminfo. This is not used if {!lowerEnum} is false (default).
Use {!Cillower.lowerEnumVisitor} to replace these with integer
constants. *)
(** Unary operators *)
and unop =
Neg (** Unary minus *)
| BNot (** Bitwise complement (~) *)
| LNot (** Logical Not (!) *)
(** Binary operations *)
and binop =
PlusA (** arithmetic + *)
| PlusPI (** pointer + integer *)
| IndexPI (** pointer + integer but only when
it arises from an expression
[e\[i\]] when [e] is a pointer and
not an array. This is semantically
the same as PlusPI but CCured uses
this as a hint that the integer is
probably positive. *)
| MinusA (** arithmetic - *)
| MinusPI (** pointer - integer *)
| MinusPP (** pointer - pointer *)
| Mult (** * *)
| Div (** / *)
| Mod (** % *)
| Shiftlt (** shift left *)
| Shiftrt (** shift right *)
| Lt (** < (arithmetic comparison) *)
| Gt (** > (arithmetic comparison) *)
| Le (** <= (arithmetic comparison) *)
| Ge (** > (arithmetic comparison) *)
| Eq (** == (arithmetic comparison) *)
| Ne (** != (arithmetic comparison) *)
| BAnd (** bitwise and *)
| BXor (** exclusive-or *)
| BOr (** inclusive-or *)
| LAnd (** logical and *)
| LOr (** logical or *)
(** An lvalue denotes the contents of a range of memory addresses. This range
is denoted as a host object along with an offset within the object. The
host object can be of two kinds: a local or global variable, or an object
whose address is in a pointer expression. We distinguish the two cases so
that we can tell quickly whether we are accessing some component of a
variable directly or we are accessing a memory location through a pointer.*)
and lval =
lhost * offset
(** The host part of an {!lval}. *)
and lhost =
| Var of varinfo
(** The host is a variable. *)
| Mem of exp
(** The host is an object of type [T] when the expression has pointer
[TPtr(T)]. *)
(** The offset part of an {!lval}. Each offset can be applied to certain
kinds of lvalues and its effect is that it advances the starting address
of the lvalue and changes the denoted type, essentially focussing to some
smaller lvalue that is contained in the original one. *)
and offset =
| NoOffset (** No offset. Can be applied to any lvalue and does
not change either the starting address or the type.
This is used when the lval consists of just a host
or as a terminator in a list of other kinds of
offsets. *)
| Field of fieldinfo * offset
(** A field offset. Can be applied only to an lvalue
that denotes a structure or a union that contains
the mentioned field. This advances the offset to the
beginning of the mentioned field and changes the
type to the type of the mentioned field. *)
| Index of exp * offset
(** An array index offset. Can be applied only to an
lvalue that denotes an array. This advances the
starting address of the lval to the beginning of the
mentioned array element and changes the denoted type
to be the type of the array element *)
(* The following equivalences hold *)
(* Mem(AddrOf(Mem a, aoff)), off = Mem a, aoff + off *)
(* Mem(AddrOf(Var v, aoff)), off = Var v, aoff + off *)
(* AddrOf (Mem a, NoOffset) = a *)
(** Initializers for global variables. You can create an initializer with
{!makeZeroInit}. *)
and init =
| SingleInit of exp (** A single initializer *)
| CompoundInit of typ * (offset * init) list
(** Used only for initializers of structures, unions and arrays.
The offsets are all of the form [Field(f, NoOffset)] or
[Index(i, NoOffset)] and specify the field or the index being
initialized. For structures all fields
must have an initializer (except the unnamed bitfields), in
the proper order. This is necessary since the offsets are not
printed. For arrays the list must contain a prefix of the
initializers; the rest are 0-initialized.
For unions there must be exactly one initializer. If
the initializer is not for the first field then a field
designator is printed. You can scan an initializer list with
{!foldLeftCompound}. *)
(** We want to be able to update an initializer in a global variable, so we
define it as a mutable field *)
and initinfo = {
mutable init : init option;
}
(** Function definitions. *)
and fundec =
{ mutable svar: varinfo;
(** Holds the name and type as a variable, so we can refer to it
easily from the program. All references to this function either
in a function call or in a prototype must point to the same
varinfo. *)
mutable sformals: varinfo list;
(** Formals. These must be shared with the formals that appear in the
type of the function. Use {!setFormals} or
{!setFunctionType} to set these
formals and ensure that they are reflected in the function type.
Do not make copies of these because the body refers to them. *)
mutable slocals: varinfo list;
(** Locals. Does not include the sformals. Do not make copies of
these because the body refers to them. *)
mutable smaxid: int; (** Max local id. Starts at 0. *)
mutable sbody: block; (** The function body. *)
mutable smaxstmtid: int option; (** max id of a (reachable) statement
in this function, if we have
computed it. range = 0 ...
(smaxstmtid-1). This is computed by
{!computeCFGInfo}. *)
mutable sallstmts: stmt list; (** After you call {!computeCFGInfo}
this field is set to contain all
statements in the function *)
}
(** A block is a sequence of statements with the control falling through from
one element to the next *)
and block =
{ mutable battrs: attributes; (** Attributes for the block *)
mutable bstmts: stmt list; (** The statements comprising the block*)
}
(** Statements.
The statement is the structural unit in the control flow graph. Use mkStmt
to make a statement and then fill in the fields. *)
and stmt = {
mutable labels: label list; (** Whether the statement starts with
some labels, case statements or
default statement *)
mutable skind: stmtkind; (** The kind of statement *)
(* Now some additional control flow information. Initially this is not
filled in. *)
mutable sid: int; (** A number (>= 0) that is unique
in a function. *)
mutable succs: stmt list; (** The successor statements. They can
always be computed from the skind
and the context in which this
statement appears *)
mutable preds: stmt list; (** The inverse of the succs function*)
mutable fallthrough: stmt option; (** The fallthrough successor statement computed from the context of this statement in {!computeCFGInto}. Useful for the syntactic successor of Goto and Loop. *)
}
(** Labels *)
and label =
Label of string * location * bool
(** A real label. If the bool is "true", the label is from the
input source program. If the bool is "false", the label was
created by CIL or some other transformation *)
| Case of exp * location * location (** A case statement. Second location is just for label. *)
| CaseRange of exp * exp * location * location (** A case statement corresponding to a
range of values. Second location is just for label. *)
| Default of location * location (** A default statement. Second location is just for label. *)
(* The various kinds of statements *)
and stmtkind =
| Instr of instr list (** A group of instructions that do not
contain control flow. Control
implicitly falls through. *)
| Return of exp option * location (** The return statement. This is a
leaf in the CFG. *)
| Goto of stmt ref * location (** A goto statement. Appears from
actual goto's in the code. *)
| ComputedGoto of exp * location
| Break of location (** A break to the end of the nearest
enclosing Loop or Switch *)
| Continue of location (** A continue to the start of the
nearest enclosing [Loop] *)
| If of exp * block * block * location * location (** A conditional.
Two successors, the "then" and
the "else" branches. Both
branches fall-through to the
successor of the If statement.
Second location is just for expression. *)
| Switch of exp * block * (stmt list) * location * location
(** A switch statement. The block
contains within all of the cases.
We also have direct pointers to the
statements that implement the
cases. Which cases they implement
you can get from the labels of the
statement.
Second location is just for expression. *)
| Loop of block * location * location * (stmt option) * (stmt option)
(** A [while(1)] loop. The
termination test is implemented
in the body of a loop using a
[Break] statement. If
prepareCFG has been called, the
first stmt option will point to
the stmt containing the
continue label for this loop
and the second will point to
the stmt containing the break
label for this loop.
Second location is just for expression. *)
| Block of block (** Just a block of statements. Use it
as a way to keep some attributes
local *)
(** Instructions. They may cause effects directly but may not have control
flow.*)
and instr =
Set of lval * exp * location * location (** An assignment. A cast is present
if the exp has different type
from lval.
Second location is just for expression when inside condition. *)
| VarDecl of varinfo * location (** "Instruction" in the location where a varinfo was declared.
All varinfos for which such a VarDecl instruction exists have
vhasdeclinstruction set to true.
The motivation for the addition of this instruction was to
support VLAs for which declarations can not be pulled up like
CIL used to do. *)
| Call of lval option * exp * exp list * location * location
(** optional: result is an lval. A cast might be
necessary if the declared result type of the
function is not the same as that of the
destination. If the function is declared then
casts are inserted for those arguments that
correspond to declared formals. (The actual
number of arguments might be smaller or larger
than the declared number of arguments. C allows
this.) If the type of the result variable is not
the same as the declared type of the function
result then an implicit cast exists.
Second location is just for expression when inside condition. *)
(* See the GCC specification for the meaning of ASM.
If the source is MS VC then only the templates
are used *)
(* sm: I've added a notes.txt file which contains more
information on interpreting Asm instructions *)
| Asm of attributes * (* Really only const and volatile can appear
here *)
string list * (* templates (CR-separated) *)
(string option * string * lval) list *
(* outputs must be lvals with
optional names and constraints.
I would like these
to be actually variables, but I
run into some trouble with ASMs
in the Linux sources *)
(string option * string * exp) list *
(* inputs with optional names and constraints *)
string list * (* register clobbers *)
location
(** An inline assembly instruction. The arguments are (1) a list of
attributes (only const and volatile can appear here and only for
GCC), (2) templates (CR-separated), (3) a list of
outputs, each of which is an lvalue with a constraint, (4) a list
of input expressions along with constraints, (5) clobbered
registers, and (5) location information *)
(** Describes a location in a source file *)
and location = {
line: int; (** The line number. -1 means "do not know" *)
file: string; (** The name of the source file*)
byte: int; (** The byte position in the source file *)
column: int; (** The column number *)
endLine: int; (** End line number. Negative means unknown. *)
endByte: int; (** End byte position. Negative means unknown. *)
endColumn: int; (** End column number. Negative means unknown. *)
synthetic: bool; (** Synthetic location, doesn't necessarily precisely correspond to a location in original source code, e.g. due to CIL transformations.
@see <https://github.com/goblint/cil/pull/98> for some examples. *)
}
(* Type signatures. Two types are identical iff they have identical
signatures *)
and typsig =
TSArray of typsig * cilint option * attribute list
| TSPtr of typsig * attribute list
| TSComp of bool * string * attribute list
| TSFun of typsig * typsig list option * bool * attribute list
| TSEnum of string * attribute list
| TSBase of typ
let locUnknown = { line = -1;
file = "";
byte = -1;
column = -1;
endLine = -1;
endByte = -1;
endColumn = -1;
synthetic = true;}
(* A reference to the current location *)
let currentLoc : location ref = ref locUnknown
(* A reference to the current expression location *)
let currentExpLoc : location ref = ref locUnknown
(* A reference to the current global being visited *)
let currentGlobal: global ref = ref (GText "dummy")
let compareLoc (a: location) (b: location) : int =
let namecmp = compare a.file b.file in
if namecmp != 0
then namecmp
else
let linecmp = a.line - b.line in
if linecmp != 0
then linecmp
else
let columncmp = a.column - b.column in
if columncmp != 0
then columncmp
else
let bytecmp = a.byte - b.byte in
if bytecmp != 0
then bytecmp
else
let endLinecmp = a.endLine - b.endLine in
if endLinecmp != 0
then endLinecmp
else
let endColumncmp = a.endColumn - b.endColumn in
if endColumncmp != 0
then endColumncmp
else a.endByte - b.endByte
let argsToList : (string * typ * attributes) list option
-> (string * typ * attributes) list
= function
None -> []
| Some al -> al
(* A hack to allow forward reference of d_exp *)
let pd_exp : (unit -> exp -> doc) ref =
ref (fun _ -> E.s (E.bug "pd_exp not initialized"))
let pd_type : (unit -> typ -> doc) ref =
ref (fun _ -> E.s (E.bug "pd_type not initialized"))
let pd_attr : (unit -> attribute -> doc) ref =
ref (fun _ -> E.s (E.bug "pd_attr not initialized"))
(** Different visiting actions. 'a will be instantiated with [exp], [instr],
etc. *)
type 'a visitAction =
SkipChildren (** Do not visit the children. Return
the node as it is. *)
| DoChildren (** Continue with the children of this
node. Rebuild the node on return
if any of the children changes
(use == test) *)
| ChangeTo of 'a (** Replace the expression with the
given one *)
| ChangeDoChildrenPost of 'a * ('a -> 'a) (** First consider that the entire
exp is replaced by the first
parameter. Then continue with
the children. On return rebuild
the node if any of the children
has changed and then apply the
function on the node *)
(* sm/gn: cil visitor interface for traversing Cil trees. *)
(* Use visitCilStmt and/or visitCilFile to use this. *)
(* Some of the nodes are changed in place if the children are changed. Use
one of Change... actions if you want to copy the node *)
(** A visitor interface for traversing CIL trees. Create instantiations of
this type by specializing the class {!nopCilVisitor}. *)
class type cilVisitor = object
method vvdec: varinfo -> varinfo visitAction
(** Invoked for each variable declaration. The subtrees to be traversed
are those corresponding to the type and attributes of the variable.
Note that variable declarations are all the [GVar], [GVarDecl], [GFun],
all the [varinfo] in formals of function types, and the formals and
locals for function definitions. This means that the list of formals
in a function definition will be traversed twice, once as part of the
function type and second as part of the formals in a function
definition. *)
method vvrbl: varinfo -> varinfo visitAction
(** Invoked on each variable use. Here only the [SkipChildren] and
[ChangeTo] actions make sense since there are no subtrees. Note that
the type and attributes of the variable are not traversed for a
variable use *)
method vexpr: exp -> exp visitAction
(** Invoked on each expression occurrence. The subtrees are the
subexpressions, the types (for a [Cast] or [SizeOf] expression) or the
variable use. *)
method vlval: lval -> lval visitAction