@@ -10,9 +10,14 @@ Author: Daniel Kroening, kroening@kroening.com
1010
1111#include  < util/std_expr.h> 
1212#include  < util/prefix.h> 
13+ #include  < util/arith_tools.h> 
14+ #include  < util/unicode.h> 
15+ 
16+ #include  < linking/zero_initializer.h> 
1317
1418#include  " java_bytecode_typecheck.h" 
1519#include  " java_pointer_casts.h" 
20+ #include  " java_types.h" 
1621
1722/* ******************************************************************\
1823
@@ -114,6 +119,27 @@ static std::string escape_non_alnum(const std::string &toescape)
114119
115120/* ******************************************************************\
116121
122+ Function: utf16_to_array 
123+ 
124+   Inputs: `in`: wide string to convert 
125+ 
126+  Outputs: Returns a Java char array containing the same wchars. 
127+ 
128+  Purpose: Convert UCS-2 or UTF-16 to an array expression. 
129+ 
130+ \*******************************************************************/  
131+ 
132+ static  array_exprt utf16_to_array (const  std::wstring &in)
133+ {
134+   const  auto  jchar=java_char_type ();
135+   array_exprt ret (array_typet (jchar, infinity_exprt (java_int_type ())));
136+   for (const  auto  c : in)
137+     ret.copy_to_operands (from_integer (c, jchar));
138+   return  ret;
139+ }
140+ 
141+ /* ******************************************************************\
142+ 
117143Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal 
118144
119145  Inputs: 
@@ -136,28 +162,106 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
136162  auto  findit=symbol_table.symbols .find (escaped_symbol_name);
137163  if (findit!=symbol_table.symbols .end ())
138164  {
139-     expr=findit->second .symbol_expr ();
165+     expr=address_of_exprt ( findit->second .symbol_expr () );
140166    return ;
141167  }
142168
143169  //  Create a new symbol:
144170  symbolt new_symbol;
145171  new_symbol.name =escaped_symbol_name;
146-   new_symbol.type =pointer_typet ( string_type) ;
172+   new_symbol.type =string_type;
147173  new_symbol.base_name =" Literal"  ;
148174  new_symbol.pretty_name =value;
149175  new_symbol.mode =ID_java;
150176  new_symbol.is_type =false ;
151177  new_symbol.is_lvalue =true ;
152178  new_symbol.is_static_lifetime =true ; //  These are basically const global data.
153179
180+   //  Regardless of string refinement setting, at least initialize
181+   //  the literal with @clsid = String and @lock = false:
182+   symbol_typet jlo_symbol (" java::java.lang.Object"  );
183+   const  auto  &jlo_struct=to_struct_type (ns.follow (jlo_symbol));
184+   struct_exprt jlo_init (jlo_symbol);
185+   const  auto  &jls_struct=to_struct_type (ns.follow (string_type));
186+ 
187+   jlo_init.copy_to_operands (
188+     constant_exprt (
189+       " java::java.lang.String"  ,
190+       jlo_struct.components ()[0 ].type ()));
191+   jlo_init.copy_to_operands (
192+     from_integer (
193+       0 ,
194+       jlo_struct.components ()[1 ].type ()));
195+ 
196+   //  If string refinement *is* around, populate the actual
197+   //  contents as well:
198+   if (string_refinement_enabled)
199+   {
200+     struct_exprt literal_init (new_symbol.type );
201+     literal_init.move_to_operands (jlo_init);
202+ 
203+     //  Initialize the string with a constant utf-16 array:
204+     symbolt array_symbol;
205+     array_symbol.name =escaped_symbol_name+" _constarray"  ;
206+     array_symbol.type =array_typet (
207+       java_char_type (), infinity_exprt (java_int_type ()));
208+     array_symbol.base_name =" Literal_constarray"  ;
209+     array_symbol.pretty_name =value;
210+     array_symbol.mode =ID_java;
211+     array_symbol.is_type =false ;
212+     array_symbol.is_lvalue =true ;
213+     //  These are basically const global data:
214+     array_symbol.is_static_lifetime =true ;
215+     array_symbol.is_state_var =true ;
216+     auto  literal_array=utf16_to_array (
217+       utf8_to_utf16_little_endian (id2string (value)));
218+     array_symbol.value =literal_array;
219+ 
220+     if (symbol_table.add (array_symbol))
221+       throw  " failed to add constarray symbol to symbol table"  ;
222+ 
223+     literal_init.copy_to_operands (
224+       from_integer (literal_array.operands ().size (),
225+                    jls_struct.components ()[1 ].type ()));
226+     literal_init.copy_to_operands (
227+       address_of_exprt (array_symbol.symbol_expr ()));
228+ 
229+     new_symbol.value =literal_init;
230+   }
231+   else  if (jls_struct.components ().size ()>=1  &&
232+           jls_struct.components ()[0 ].get_name ()==" @java.lang.Object"  )
233+   {
234+     //  Case where something defined java.lang.String, so it has
235+     //  a proper base class (always java.lang.Object in practical
236+     //  JDKs seen so far)
237+     struct_exprt literal_init (new_symbol.type );
238+     literal_init.move_to_operands (jlo_init);
239+     for (const  auto  &comp : jls_struct.components ())
240+     {
241+       if (comp.get_name ()==" @java.lang.Object"  )
242+         continue ;
243+       //  Other members of JDK's java.lang.String we don't understand
244+       //  without string-refinement. Just zero-init them; consider using
245+       //  test-gen-like nondet object trees instead.
246+       literal_init.copy_to_operands (
247+         zero_initializer (comp.type (), expr.source_location (), ns));
248+     }
249+     new_symbol.value =literal_init;
250+   }
251+   else  if (jls_struct.get_bool (ID_incomplete_class))
252+   {
253+     //  Case where java.lang.String was stubbed, and so directly defines
254+     //  @class_identifier and @lock:
255+     new_symbol.value =jlo_init;
256+   }
257+ 
154258  if (symbol_table.add (new_symbol))
155259  {
156260    error () << " failed to add string literal symbol to symbol table"   << eom;
157261    throw  0 ;
158262  }
159263
160-   expr=new_symbol.symbol_expr ();
264+   expr=address_of_exprt ( new_symbol.symbol_expr () );
161265}
162266
163267/* ******************************************************************\
0 commit comments