cprover
Loading...
Searching...
No Matches
java_bytecode_parser.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
10
11#include <algorithm>
12#include <fstream>
13#include <map>
14#include <string>
15
16#include <util/arith_tools.h>
17#include <util/ieee_float.h>
18#include <util/parser.h>
19#include <util/std_expr.h>
21#include <util/optional.h>
22
23#include "bytecode_info.h"
26#include "java_types.h"
27
28class java_bytecode_parsert final : public parsert
29{
30public:
33 {
34 }
35
36 bool parse() override;
37
39 {
40 u1 tag = 0;
41 u2 ref1 = 0;
42 u2 ref2 = 0;
46 };
47
49
50private:
59
60 using constant_poolt = std::vector<pool_entryt>;
61
63
64 const bool skip_instructions = false;
65
67 {
68 if(index==0 || index>=constant_pool.size())
69 {
70 error() << "invalid constant pool index (" << index << ")" << eom;
71 error() << "constant pool size: " << constant_pool.size() << eom;
72 throw 0;
73 }
74
75 return constant_pool[index];
76 }
77
79 {
80 return pool_entry(index).expr;
81 }
82
83 const typet type_entry(u2 index)
84 {
86 }
87
88 void rClassFile();
89 void rconstant_pool();
90 void rinterfaces();
91 void rfields();
92 void rmethods();
93 void rmethod();
94 void rinner_classes_attribute(const u4 &attribute_length);
95 std::vector<irep_idt> rexceptions_attribute();
96 void rclass_attribute();
97 void rRuntimeAnnotation_attribute(std::vector<annotationt> &);
101 void rmethod_attribute(methodt &method);
102 void rfield_attribute(fieldt &);
103 void rcode_attribute(methodt &method);
104 void read_verification_type_info(methodt::verification_type_infot &);
105 void rbytecode(std::vector<instructiont> &);
106 void get_class_refs();
107 void get_class_refs_rec(const typet &);
108 void get_annotation_class_refs(const std::vector<annotationt> &annotations);
109 void get_annotation_value_class_refs(const exprt &value);
112 parse_method_handle(const class method_handle_infot &entry);
114
115 void skip_bytes(std::size_t bytes)
116 {
117 for(std::size_t i=0; i<bytes; i++)
118 {
119 if(!*in)
120 {
121 error() << "unexpected end of bytecode file" << eom;
122 throw 0;
123 }
124 in->get();
125 }
126 }
127
128 template <typename T>
130 {
131 static_assert(
132 std::is_unsigned<T>::value, "T should be an unsigned integer");
133 const constexpr size_t bytes = sizeof(T);
134 u8 result = 0;
135 for(size_t i = 0; i < bytes; i++)
136 {
137 if(!*in)
138 {
139 error() << "unexpected end of bytecode file" << eom;
140 throw 0;
141 }
142 result <<= 8u;
143 result |= static_cast<u1>(in->get());
144 }
145 return narrow_cast<T>(result);
146 }
147
148 void store_unknown_method_handle(size_t bootstrap_method_index);
149};
150
151#define CONSTANT_Class 7
152#define CONSTANT_Fieldref 9
153#define CONSTANT_Methodref 10
154#define CONSTANT_InterfaceMethodref 11
155#define CONSTANT_String 8
156#define CONSTANT_Integer 3
157#define CONSTANT_Float 4
158#define CONSTANT_Long 5
159#define CONSTANT_Double 6
160#define CONSTANT_NameAndType 12
161#define CONSTANT_Utf8 1
162#define CONSTANT_MethodHandle 15
163#define CONSTANT_MethodType 16
164#define CONSTANT_InvokeDynamic 18
165
166#define VTYPE_INFO_TOP 0
167#define VTYPE_INFO_INTEGER 1
168#define VTYPE_INFO_FLOAT 2
169#define VTYPE_INFO_LONG 3
170#define VTYPE_INFO_DOUBLE 4
171#define VTYPE_INFO_ITEM_NULL 5
172#define VTYPE_INFO_UNINIT_THIS 6
173#define VTYPE_INFO_OBJECT 7
174#define VTYPE_INFO_UNINIT 8
175
177{
178public:
180 using pool_entry_lookupt = std::function<pool_entryt &(u2)>;
181
182 explicit structured_pool_entryt(const pool_entryt &entry) : tag(entry.tag)
183 {
184 }
185
186 u1 get_tag() const
187 {
188 return tag;
189 }
190
191protected:
192 static std::string read_utf8_constant(const pool_entryt &entry)
193 {
194 INVARIANT(
195 entry.tag == CONSTANT_Utf8, "Name entry must be a constant UTF-8");
196 return id2string(entry.s);
197 }
198
199private:
201};
202
206{
207public:
208 explicit class_infot(const pool_entryt &entry) : structured_pool_entryt(entry)
209 {
211 name_index = entry.ref1;
212 }
213
214 std::string get_name(const pool_entry_lookupt &pool_entry) const
215 {
216 const pool_entryt &name_entry = pool_entry(name_index);
217 return read_utf8_constant(name_entry);
218 }
219
220private:
222};
223
227{
228public:
229 explicit name_and_type_infot(const pool_entryt &entry)
231 {
233 name_index = entry.ref1;
234 descriptor_index = entry.ref2;
235 }
236
237 std::string get_name(const pool_entry_lookupt &pool_entry) const
238 {
239 const pool_entryt &name_entry = pool_entry(name_index);
240 return read_utf8_constant(name_entry);
241 }
242
243 std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
244 {
245 const pool_entryt &descriptor_entry = pool_entry(descriptor_index);
246 return read_utf8_constant(descriptor_entry);
247 }
248
249private:
252};
253
255{
256public:
257 explicit base_ref_infot(const pool_entryt &entry)
259 {
261 entry.tag == CONSTANT_Fieldref || entry.tag == CONSTANT_Methodref ||
263 class_index = entry.ref1;
265 }
266
268 {
269 return class_index;
270 }
272 {
273 return name_and_type_index;
274 }
275
277 get_name_and_type(const pool_entry_lookupt &pool_entry) const
278 {
279 const pool_entryt &name_and_type_entry = pool_entry(name_and_type_index);
280
281 INVARIANT(
282 name_and_type_entry.tag == CONSTANT_NameAndType,
283 "name_and_typeindex did not correspond to a name_and_type in the "
284 "constant pool");
285
286 return name_and_type_infot{name_and_type_entry};
287 }
288
289 class_infot get_class(const pool_entry_lookupt &pool_entry) const
290 {
291 const pool_entryt &class_entry = pool_entry(class_index);
292
293 return class_infot{class_entry};
294 }
295
296private:
299};
300
302{
303public:
308 {
309 REF_getField = 1,
310 REF_getStatic = 2,
311 REF_putField = 3,
312 REF_putStatic = 4,
318 };
319
320 explicit method_handle_infot(const pool_entryt &entry)
322 {
324 PRECONDITION(entry.ref1 > 0 && entry.ref1 < 10); // Java 8 spec 4.4.8
325 handle_kind = static_cast<method_handle_kindt>(entry.ref1);
326 reference_index = entry.ref2;
327 }
328
330 {
331 return handle_kind;
332 }
333
335 {
336 const base_ref_infot ref_entry{pool_entry(reference_index)};
337
338 // validate the correctness of the constant pool entry
339 switch(handle_kind)
340 {
345 {
346 INVARIANT(ref_entry.get_tag() == CONSTANT_Fieldref, "4.4.2");
347 break;
348 }
351 {
352 INVARIANT(ref_entry.get_tag() == CONSTANT_Methodref, "4.4.2");
353 break;
354 }
357 {
358 INVARIANT(
359 ref_entry.get_tag() == CONSTANT_Methodref ||
360 ref_entry.get_tag() == CONSTANT_InterfaceMethodref,
361 "4.4.2");
362 break;
363 }
365 {
366 INVARIANT(ref_entry.get_tag() == CONSTANT_InterfaceMethodref, "4.4.8");
367 break;
368 }
369 }
370
371 return ref_entry;
372 }
373
374private:
377};
378
380{
381 try
382 {
383 rClassFile();
384 }
385
386 catch(const char *message)
387 {
388 error() << message << eom;
389 return true;
390 }
391
392 catch(const std::string &message)
393 {
394 error() << message << eom;
395 return true;
396 }
397
398 catch(...)
399 {
400 error() << "parsing error" << eom;
401 return true;
402 }
403
404 return false;
405}
406
407#define ACC_PUBLIC 0x0001u
408#define ACC_PRIVATE 0x0002u
409#define ACC_PROTECTED 0x0004u
410#define ACC_STATIC 0x0008u
411#define ACC_FINAL 0x0010u
412#define ACC_SYNCHRONIZED 0x0020u
413#define ACC_BRIDGE 0x0040u
414#define ACC_NATIVE 0x0100u
415#define ACC_INTERFACE 0x0200u
416#define ACC_ABSTRACT 0x0400u
417#define ACC_STRICT 0x0800u
418#define ACC_SYNTHETIC 0x1000u
419#define ACC_ANNOTATION 0x2000u
420#define ACC_ENUM 0x4000u
421
422#define UNUSED_u2(x) \
423 { \
424 const u2 x = read<u2>(); \
425 (void)x; \
426 } \
427 (void)0
428
430{
432
433 const u4 magic = read<u4>();
434 UNUSED_u2(minor_version);
435 const u2 major_version = read<u2>();
436
437 if(magic!=0xCAFEBABE)
438 {
439 error() << "wrong magic" << eom;
440 throw 0;
441 }
442
443 if(major_version<44)
444 {
445 error() << "unexpected major version" << eom;
446 throw 0;
447 }
448
450
451 classt &parsed_class=parse_tree.parsed_class;
452
453 const u2 access_flags = read<u2>();
454 const u2 this_class = read<u2>();
455 const u2 super_class = read<u2>();
456
457 parsed_class.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
458 parsed_class.is_enum=(access_flags&ACC_ENUM)!=0;
459 parsed_class.is_public=(access_flags&ACC_PUBLIC)!=0;
460 parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
461 parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
462 parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
463 parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
464 parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
465 parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
466 parsed_class.name=
467 constant(this_class).type().get(ID_C_base_name);
468
469 if(super_class!=0)
470 parsed_class.super_class = constant(super_class).type().get(ID_C_base_name);
471
472 rinterfaces();
473 rfields();
474 rmethods();
475
476 // count elements of enum
477 if(parsed_class.is_enum)
479 if(field.is_enum)
481
482 const u2 attributes_count = read<u2>();
483
484 for(std::size_t j=0; j<attributes_count; j++)
486
488
490}
491
494{
495 for(const auto &c : constant_pool)
496 {
497 switch(c.tag)
498 {
499 case CONSTANT_Class:
500 get_class_refs_rec(c.expr.type());
501 break;
502
506 break;
507
508 default: {}
509 }
510 }
511
513
514 for(const auto &field : parse_tree.parsed_class.fields)
515 {
516 get_annotation_class_refs(field.annotations);
517
518 if(field.signature.has_value())
519 {
521 field.descriptor,
522 field.signature,
524
525 // add generic type args to class refs as dependencies, same below for
526 // method types and entries from the local variable type table
528 field_type, parse_tree.class_refs);
529 get_class_refs_rec(field_type);
530 }
531 else
532 {
533 get_class_refs_rec(*java_type_from_string(field.descriptor));
534 }
535 }
536
537 for(const auto &method : parse_tree.parsed_class.methods)
538 {
539 get_annotation_class_refs(method.annotations);
540 for(const auto &parameter_annotations : method.parameter_annotations)
541 get_annotation_class_refs(parameter_annotations);
542
543 if(method.signature.has_value())
544 {
546 method.descriptor,
547 method.signature,
550 method_type, parse_tree.class_refs);
551 get_class_refs_rec(method_type);
552 }
553 else
554 {
555 get_class_refs_rec(*java_type_from_string(method.descriptor));
556 }
557
558 for(const auto &var : method.local_variable_table)
559 {
560 if(var.signature.has_value())
561 {
563 var.descriptor,
564 var.signature,
567 var_type, parse_tree.class_refs);
568 get_class_refs_rec(var_type);
569 }
570 else
571 {
573 }
574 }
575 }
576}
577
579{
580 if(src.id()==ID_code)
581 {
582 const java_method_typet &ct = to_java_method_type(src);
583 const typet &rt=ct.return_type();
585 for(const auto &p : ct.parameters())
586 get_class_refs_rec(p.type());
587 }
588 else if(src.id() == ID_struct_tag)
589 {
590 const struct_tag_typet &struct_tag_type = to_struct_tag_type(src);
591 if(is_java_array_tag(struct_tag_type.get_identifier()))
593 else
594 parse_tree.class_refs.insert(src.get(ID_C_base_name));
595 }
596 else if(src.id()==ID_struct)
597 {
598 const struct_typet &struct_type=to_struct_type(src);
599 for(const auto &c : struct_type.components())
600 get_class_refs_rec(c.type());
601 }
602 else if(src.id()==ID_pointer)
604}
605
609 const std::vector<annotationt> &annotations)
610{
611 for(const auto &annotation : annotations)
612 {
613 get_class_refs_rec(annotation.type);
614 for(const auto &element_value_pair : annotation.element_value_pairs)
615 get_annotation_value_class_refs(element_value_pair.value);
616 }
617}
618
623{
624 if(const auto &symbol_expr = expr_try_dynamic_cast<symbol_exprt>(value))
625 {
626 const irep_idt &value_id = symbol_expr->get_identifier();
628 }
629 else if(const auto &array_expr = expr_try_dynamic_cast<array_exprt>(value))
630 {
631 for(const exprt &operand : array_expr->operands())
633 }
634 // TODO: enum and nested annotation cases (once these are correctly parsed by
635 // get_relement_value).
636 // Note that in the cases where expr is a string or primitive type, no
637 // additional class references are needed.
638}
639
641{
642 const u2 constant_pool_count = read<u2>();
643 if(constant_pool_count==0)
644 {
645 error() << "invalid constant_pool_count" << eom;
646 throw 0;
647 }
648
649 constant_pool.resize(constant_pool_count);
650
651 for(auto it = std::next(constant_pool.begin()); it != constant_pool.end();
652 it++)
653 {
654 it->tag = read<u1>();
655
656 switch(it->tag)
657 {
658 case CONSTANT_Class:
659 it->ref1 = read<u2>();
660 break;
661
667 it->ref1 = read<u2>();
668 it->ref2 = read<u2>();
669 break;
670
671 case CONSTANT_String:
673 it->ref1 = read<u2>();
674 break;
675
676 case CONSTANT_Integer:
677 case CONSTANT_Float:
678 it->number = read<u4>();
679 break;
680
681 case CONSTANT_Long:
682 case CONSTANT_Double:
683 it->number = read<u8>();
684 // Eight-byte constants take up two entries in the constant_pool table.
685 if(it==constant_pool.end())
686 {
687 error() << "invalid double entry" << eom;
688 throw 0;
689 }
690 it++;
691 it->tag=0;
692 break;
693
694 case CONSTANT_Utf8:
695 {
696 const u2 bytes = read<u2>();
697 std::string s;
698 s.resize(bytes);
699 for(auto &ch : s)
700 ch = read<u1>();
701 it->s = s; // Add to string table
702 }
703 break;
704
706 it->ref1 = read<u1>();
707 it->ref2 = read<u2>();
708 break;
709
710 default:
711 error() << "unknown constant pool entry (" << it->tag << ")"
712 << eom;
713 throw 0;
714 }
715 }
716
717 // we do a bit of post-processing after we have them all
718 std::for_each(
719 std::next(constant_pool.begin()),
720 constant_pool.end(),
721 [&](constant_poolt::value_type &entry) {
722 switch(entry.tag)
723 {
724 case CONSTANT_Class:
725 {
726 const std::string &s = id2string(pool_entry(entry.ref1).s);
727 entry.expr = type_exprt(java_classname(s));
728 }
729 break;
730
731 case CONSTANT_Fieldref:
732 {
733 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
734 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
735 const pool_entryt &class_entry = pool_entry(entry.ref1);
736 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
737 typet type=type_entry(nameandtype_entry.ref2);
738
739 auto class_tag = java_classname(id2string(class_name_entry.s));
740
741 fieldref_exprt fieldref(type, name_entry.s, class_tag.get_identifier());
742
743 entry.expr = fieldref;
744 }
745 break;
746
747 case CONSTANT_Methodref:
748 case CONSTANT_InterfaceMethodref:
749 {
750 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
751 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
752 const pool_entryt &class_entry = pool_entry(entry.ref1);
753 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
754 typet type=type_entry(nameandtype_entry.ref2);
755
756 auto class_tag = java_classname(id2string(class_name_entry.s));
757
758 irep_idt mangled_method_name =
759 id2string(name_entry.s) + ":" +
760 id2string(pool_entry(nameandtype_entry.ref2).s);
761
762 irep_idt class_id = class_tag.get_identifier();
763
764 entry.expr = class_method_descriptor_exprt{
765 type, mangled_method_name, class_id, name_entry.s};
766 }
767 break;
768
769 case CONSTANT_String:
770 {
771 // ldc turns these into references to java.lang.String
772 entry.expr = java_string_literal_exprt{pool_entry(entry.ref1).s};
773 }
774 break;
775
776 case CONSTANT_Integer:
777 entry.expr = from_integer(entry.number, java_int_type());
778 break;
779
780 case CONSTANT_Float:
781 {
782 ieee_floatt value(ieee_float_spect::single_precision());
783 value.unpack(entry.number);
784 entry.expr = value.to_expr();
785 }
786 break;
787
788 case CONSTANT_Long:
789 entry.expr = from_integer(entry.number, java_long_type());
790 break;
791
792 case CONSTANT_Double:
793 {
794 ieee_floatt value(ieee_float_spect::double_precision());
795 value.unpack(entry.number);
796 entry.expr = value.to_expr();
797 }
798 break;
799
800 case CONSTANT_NameAndType:
801 {
802 entry.expr.id("nameandtype");
803 }
804 break;
805
806 case CONSTANT_MethodHandle:
807 {
808 entry.expr.id("methodhandle");
809 }
810 break;
811
812 case CONSTANT_MethodType:
813 {
814 entry.expr.id("methodtype");
815 }
816 break;
817
818 case CONSTANT_InvokeDynamic:
819 {
820 entry.expr.id("invokedynamic");
821 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
822 typet type=type_entry(nameandtype_entry.ref2);
823 type.set(ID_java_lambda_method_handle_index, entry.ref1);
824 entry.expr.type() = type;
825 }
826 break;
827 }
828 });
829}
830
832{
833 const u2 interfaces_count = read<u2>();
834
835 for(std::size_t i=0; i<interfaces_count; i++)
837 constant(read<u2>()).type().get(ID_C_base_name));
838}
839
841{
842 const u2 fields_count = read<u2>();
843
844 for(std::size_t i=0; i<fields_count; i++)
845 {
847
848 const u2 access_flags = read<u2>();
849 const u2 name_index = read<u2>();
850 const u2 descriptor_index = read<u2>();
851 const u2 attributes_count = read<u2>();
852
853 field.name=pool_entry(name_index).s;
854 field.is_static=(access_flags&ACC_STATIC)!=0;
855 field.is_final=(access_flags&ACC_FINAL)!=0;
856 field.is_enum=(access_flags&ACC_ENUM)!=0;
857
858 field.descriptor=id2string(pool_entry(descriptor_index).s);
859 field.is_public=(access_flags&ACC_PUBLIC)!=0;
860 field.is_protected=(access_flags&ACC_PROTECTED)!=0;
861 field.is_private=(access_flags&ACC_PRIVATE)!=0;
862 const auto flags = (field.is_public ? 1 : 0) +
863 (field.is_protected?1:0)+
864 (field.is_private?1:0);
865 DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
866
867 for(std::size_t j=0; j<attributes_count; j++)
868 rfield_attribute(field);
869 }
870}
871
872#define T_BOOLEAN 4
873#define T_CHAR 5
874#define T_FLOAT 6
875#define T_DOUBLE 7
876#define T_BYTE 8
877#define T_SHORT 9
878#define T_INT 10
879#define T_LONG 11
880
881void java_bytecode_parsert::rbytecode(std::vector<instructiont> &instructions)
882{
883 const u4 code_length = read<u4>();
884
885 u4 address;
886 size_t bytecode_index=0; // index of bytecode instruction
887
888 for(address=0; address<code_length; address++)
889 {
890 bool wide_instruction=false;
891 u4 start_of_instruction=address;
892
893 u1 bytecode = read<u1>();
894
895 if(bytecode == BC_wide)
896 {
897 wide_instruction=true;
898 address++;
899 bytecode = read<u1>();
900 // The only valid instructions following a wide byte are
901 // [ifald]load, [ifald]store, ret and iinc
902 // All of these have either format of v, or V
903 INVARIANT(
904 bytecode_info[bytecode].format == 'v' ||
905 bytecode_info[bytecode].format == 'V',
906 std::string("Unexpected wide instruction: ") +
907 bytecode_info[bytecode].mnemonic);
908 }
909
910 instructions.emplace_back();
911 instructiont &instruction=instructions.back();
912 instruction.bytecode = bytecode;
913 instruction.address=start_of_instruction;
914 instruction.source_location
915 .set_java_bytecode_index(std::to_string(bytecode_index));
916
917 switch(bytecode_info[bytecode].format)
918 {
919 case ' ': // no further bytes
920 break;
921
922 case 'c': // a constant_pool index (one byte)
923 if(wide_instruction)
924 {
925 instruction.args.push_back(constant(read<u2>()));
926 address+=2;
927 }
928 else
929 {
930 instruction.args.push_back(constant(read<u1>()));
931 address+=1;
932 }
933 break;
934
935 case 'C': // a constant_pool index (two bytes)
936 instruction.args.push_back(constant(read<u2>()));
937 address+=2;
938 break;
939
940 case 'b': // a signed byte
941 {
942 const s1 c = read<u1>();
943 instruction.args.push_back(from_integer(c, signedbv_typet(8)));
944 }
945 address+=1;
946 break;
947
948 case 'o': // two byte branch offset, signed
949 {
950 const s2 offset = read<u2>();
951 // By converting the signed offset into an absolute address (by adding
952 // the current address) the number represented becomes unsigned.
953 instruction.args.push_back(
954 from_integer(address+offset, unsignedbv_typet(16)));
955 }
956 address+=2;
957 break;
958
959 case 'O': // four byte branch offset, signed
960 {
961 const s4 offset = read<u4>();
962 // By converting the signed offset into an absolute address (by adding
963 // the current address) the number represented becomes unsigned.
964 instruction.args.push_back(
965 from_integer(address+offset, unsignedbv_typet(32)));
966 }
967 address+=4;
968 break;
969
970 case 'v': // local variable index (one byte)
971 {
972 if(wide_instruction)
973 {
974 const u2 v = read<u2>();
975 instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
976 address += 2;
977 }
978 else
979 {
980 const u1 v = read<u1>();
981 instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
982 address += 1;
983 }
984 }
985
986 break;
987
988 case 'V':
989 // local variable index (two bytes) plus two signed bytes
990 if(wide_instruction)
991 {
992 const u2 v = read<u2>();
993 instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
994 const s2 c = read<u2>();
995 instruction.args.push_back(from_integer(c, signedbv_typet(16)));
996 address+=4;
997 }
998 else // local variable index (one byte) plus one signed byte
999 {
1000 const u1 v = read<u1>();
1001 instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1002 const s1 c = read<u1>();
1003 instruction.args.push_back(from_integer(c, signedbv_typet(8)));
1004 address+=2;
1005 }
1006 break;
1007
1008 case 'I': // two byte constant_pool index plus two bytes
1009 {
1010 const u2 c = read<u2>();
1011 instruction.args.push_back(constant(c));
1012 const u1 b1 = read<u1>();
1013 instruction.args.push_back(from_integer(b1, unsignedbv_typet(8)));
1014 const u1 b2 = read<u1>();
1015 instruction.args.push_back(from_integer(b2, unsignedbv_typet(8)));
1016 }
1017 address+=4;
1018 break;
1019
1020 case 'L': // lookupswitch
1021 {
1022 u4 base_offset=address;
1023
1024 // first a pad to 32-bit align
1025 while(((address + 1u) & 3u) != 0)
1026 {
1027 read<u1>();
1028 address++;
1029 }
1030
1031 // now default value
1032 const s4 default_value = read<u4>();
1033 // By converting the signed offset into an absolute address (by adding
1034 // the current address) the number represented becomes unsigned.
1035 instruction.args.push_back(
1036 from_integer(base_offset+default_value, unsignedbv_typet(32)));
1037 address+=4;
1038
1039 // number of pairs
1040 const u4 npairs = read<u4>();
1041 address+=4;
1042
1043 for(std::size_t i=0; i<npairs; i++)
1044 {
1045 const s4 match = read<u4>();
1046 const s4 offset = read<u4>();
1047 instruction.args.push_back(
1048 from_integer(match, signedbv_typet(32)));
1049 // By converting the signed offset into an absolute address (by adding
1050 // the current address) the number represented becomes unsigned.
1051 instruction.args.push_back(
1052 from_integer(base_offset+offset, unsignedbv_typet(32)));
1053 address+=8;
1054 }
1055 }
1056 break;
1057
1058 case 'T': // tableswitch
1059 {
1060 size_t base_offset=address;
1061
1062 // first a pad to 32-bit align
1063 while(((address + 1u) & 3u) != 0)
1064 {
1065 read<u1>();
1066 address++;
1067 }
1068
1069 // now default value
1070 const s4 default_value = read<u4>();
1071 instruction.args.push_back(
1072 from_integer(base_offset+default_value, signedbv_typet(32)));
1073 address+=4;
1074
1075 // now low value
1076 const s4 low_value = read<u4>();
1077 address+=4;
1078
1079 // now high value
1080 const s4 high_value = read<u4>();
1081 address+=4;
1082
1083 // there are high-low+1 offsets, and they are signed
1084 for(s4 i=low_value; i<=high_value; i++)
1085 {
1086 s4 offset = read<u4>();
1087 instruction.args.push_back(from_integer(i, signedbv_typet(32)));
1088 // By converting the signed offset into an absolute address (by adding
1089 // the current address) the number represented becomes unsigned.
1090 instruction.args.push_back(
1091 from_integer(base_offset+offset, unsignedbv_typet(32)));
1092 address+=4;
1093 }
1094 }
1095 break;
1096
1097 case 'm': // multianewarray: constant-pool index plus one unsigned byte
1098 {
1099 const u2 c = read<u2>(); // constant-pool index
1100 instruction.args.push_back(constant(c));
1101 const u1 dimensions = read<u1>(); // number of dimensions
1102 instruction.args.push_back(
1103 from_integer(dimensions, unsignedbv_typet(8)));
1104 address+=3;
1105 }
1106 break;
1107
1108 case 't': // array subtype, one byte
1109 {
1110 typet t;
1111 switch(read<u1>())
1112 {
1113 case T_BOOLEAN: t.id(ID_bool); break;
1114 case T_CHAR: t.id(ID_char); break;
1115 case T_FLOAT: t.id(ID_float); break;
1116 case T_DOUBLE: t.id(ID_double); break;
1117 case T_BYTE: t.id(ID_byte); break;
1118 case T_SHORT: t.id(ID_short); break;
1119 case T_INT: t.id(ID_int); break;
1120 case T_LONG: t.id(ID_long); break;
1121 default:{};
1122 }
1123 instruction.args.push_back(type_exprt(t));
1124 }
1125 address+=1;
1126 break;
1127
1128 case 's': // a signed short
1129 {
1130 const s2 s = read<u2>();
1131 instruction.args.push_back(from_integer(s, signedbv_typet(16)));
1132 }
1133 address+=2;
1134 break;
1135
1136 default:
1137 throw "unknown JVM bytecode instruction";
1138 }
1139 bytecode_index++;
1140 }
1141
1142 if(address!=code_length)
1143 {
1144 error() << "bytecode length mismatch" << eom;
1145 throw 0;
1146 }
1147}
1148
1150{
1151 const u2 attribute_name_index = read<u2>();
1152 const u4 attribute_length = read<u4>();
1153
1154 irep_idt attribute_name=pool_entry(attribute_name_index).s;
1155
1156 if(attribute_name == "Code")
1157 {
1158 UNUSED_u2(max_stack);
1159 UNUSED_u2(max_locals);
1160
1162 skip_bytes(read<u4>());
1163 else
1164 rbytecode(method.instructions);
1165
1166 const u2 exception_table_length = read<u2>();
1168 skip_bytes(exception_table_length * 8u);
1169 else
1170 {
1171 method.exception_table.resize(exception_table_length);
1172
1173 for(std::size_t e = 0; e < exception_table_length; e++)
1174 {
1175 const u2 start_pc = read<u2>();
1176 const u2 end_pc = read<u2>();
1177
1178 // From the class file format spec ("4.7.3. The Code Attribute" for
1179 // Java8)
1180 INVARIANT(
1181 start_pc < end_pc,
1182 "The start_pc must be less than the end_pc as this is the range the "
1183 "exception is active");
1184
1185 const u2 handler_pc = read<u2>();
1186 const u2 catch_type = read<u2>();
1187 method.exception_table[e].start_pc = start_pc;
1188 method.exception_table[e].end_pc = end_pc;
1189 method.exception_table[e].handler_pc = handler_pc;
1190 if(catch_type != 0)
1191 method.exception_table[e].catch_type =
1192 to_struct_tag_type(pool_entry(catch_type).expr.type());
1193 }
1194 }
1195
1196 u2 attributes_count = read<u2>();
1197
1198 for(std::size_t j=0; j<attributes_count; j++)
1199 rcode_attribute(method);
1200
1201 // method name
1203 "java::" + id2string(parse_tree.parsed_class.name) + "." +
1204 id2string(method.name) + ":" + method.descriptor);
1205
1206 irep_idt line_number;
1207
1208 // add missing line numbers
1209 for(auto &instruction : method.instructions)
1210 {
1211 if(!instruction.source_location.get_line().empty())
1212 line_number = instruction.source_location.get_line();
1213 else if(!line_number.empty())
1214 instruction.source_location.set_line(line_number);
1215 instruction.source_location.set_function(
1216 method.source_location.get_function());
1217 }
1218
1219 // line number of method (the first line number available)
1220 const auto it = std::find_if(
1221 method.instructions.begin(),
1222 method.instructions.end(),
1223 [&](const instructiont &instruction) {
1224 return !instruction.source_location.get_line().empty();
1225 });
1226 if(it != method.instructions.end())
1227 method.source_location.set_line(it->source_location.get_line());
1228 }
1229 else if(attribute_name=="Signature")
1230 {
1231 const u2 signature_index = read<u2>();
1232 method.signature=id2string(pool_entry(signature_index).s);
1233 }
1234 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1235 attribute_name=="RuntimeVisibleAnnotations")
1236 {
1238 }
1239 else if(
1240 attribute_name == "RuntimeInvisibleParameterAnnotations" ||
1241 attribute_name == "RuntimeVisibleParameterAnnotations")
1242 {
1243 const u1 parameter_count = read<u1>();
1244 // There may be attributes for both runtime-visible and runtime-invisible
1245 // annotations, the length of either array may be longer than the other as
1246 // trailing parameters without annotations are omitted.
1247 // Extend our parameter_annotations if this one is longer than the one
1248 // previously recorded (if any).
1249 if(method.parameter_annotations.size() < parameter_count)
1250 method.parameter_annotations.resize(parameter_count);
1251 for(u2 param_no = 0; param_no < parameter_count; ++param_no)
1253 }
1254 else if(attribute_name == "Exceptions")
1255 {
1257 }
1258 else
1259 skip_bytes(attribute_length);
1260}
1261
1263{
1264 const u2 attribute_name_index = read<u2>();
1265 const u4 attribute_length = read<u4>();
1266
1267 irep_idt attribute_name=pool_entry(attribute_name_index).s;
1268
1269 if(attribute_name=="Signature")
1270 {
1271 const u2 signature_index = read<u2>();
1272 field.signature=id2string(pool_entry(signature_index).s);
1273 }
1274 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1275 attribute_name=="RuntimeVisibleAnnotations")
1276 {
1278 }
1279 else
1280 skip_bytes(attribute_length);
1281}
1282
1284{
1285 const u2 attribute_name_index = read<u2>();
1286 const u4 attribute_length = read<u4>();
1287
1288 irep_idt attribute_name=pool_entry(attribute_name_index).s;
1289
1290 if(attribute_name=="LineNumberTable")
1291 {
1292 std::map<unsigned, std::reference_wrapper<instructiont>> instruction_map;
1293 for(auto &instruction : method.instructions)
1294 instruction_map.emplace(instruction.address, instruction);
1295
1296 const u2 line_number_table_length = read<u2>();
1297
1298 for(std::size_t i=0; i<line_number_table_length; i++)
1299 {
1300 const u2 start_pc = read<u2>();
1301 const u2 line_number = read<u2>();
1302
1303 // annotate the bytecode program
1304 auto it = instruction_map.find(start_pc);
1305
1306 if(it!=instruction_map.end())
1307 it->second.get().source_location.set_line(line_number);
1308 }
1309 }
1310 else if(attribute_name=="LocalVariableTable")
1311 {
1312 const u2 local_variable_table_length = read<u2>();
1313
1314 method.local_variable_table.resize(local_variable_table_length);
1315
1316 for(std::size_t i=0; i<local_variable_table_length; i++)
1317 {
1318 const u2 start_pc = read<u2>();
1319 const u2 length = read<u2>();
1320 const u2 name_index = read<u2>();
1321 const u2 descriptor_index = read<u2>();
1322 const u2 index = read<u2>();
1323
1324 method.local_variable_table[i].index=index;
1325 method.local_variable_table[i].name=pool_entry(name_index).s;
1326 method.local_variable_table[i].descriptor=
1327 id2string(pool_entry(descriptor_index).s);
1328 method.local_variable_table[i].start_pc=start_pc;
1329 method.local_variable_table[i].length=length;
1330 }
1331 }
1332 else if(attribute_name=="LocalVariableTypeTable")
1333 {
1335 }
1336 else if(attribute_name=="StackMapTable")
1337 {
1338 const u2 stack_map_entries = read<u2>();
1339
1340 method.stack_map_table.resize(stack_map_entries);
1341
1342 for(size_t i=0; i<stack_map_entries; i++)
1343 {
1344 const u1 frame_type = read<u1>();
1345 if(frame_type<=63)
1346 {
1347 method.stack_map_table[i].type=methodt::stack_map_table_entryt::SAME;
1348 method.stack_map_table[i].locals.resize(0);
1349 method.stack_map_table[i].stack.resize(0);
1350 }
1351 else if(64<=frame_type && frame_type<=127)
1352 {
1353 method.stack_map_table[i].type=
1354 methodt::stack_map_table_entryt::SAME_LOCALS_ONE_STACK;
1355 method.stack_map_table[i].locals.resize(0);
1356 method.stack_map_table[i].stack.resize(1);
1357 methodt::verification_type_infot verification_type_info;
1358 read_verification_type_info(verification_type_info);
1359 method.stack_map_table[i].stack[0]=verification_type_info;
1360 }
1361 else if(frame_type==247)
1362 {
1363 method.stack_map_table[i].type=
1364 methodt::stack_map_table_entryt::SAME_LOCALS_ONE_STACK_EXTENDED;
1365 method.stack_map_table[i].locals.resize(0);
1366 method.stack_map_table[i].stack.resize(1);
1367 methodt::verification_type_infot verification_type_info;
1368 const u2 offset_delta = read<u2>();
1369 read_verification_type_info(verification_type_info);
1370 method.stack_map_table[i].stack[0]=verification_type_info;
1371 method.stack_map_table[i].offset_delta=offset_delta;
1372 }
1373 else if(248<=frame_type && frame_type<=250)
1374 {
1375 method.stack_map_table[i].type=methodt::stack_map_table_entryt::CHOP;
1376 method.stack_map_table[i].locals.resize(0);
1377 method.stack_map_table[i].stack.resize(0);
1378 const u2 offset_delta = read<u2>();
1379 method.stack_map_table[i].offset_delta=offset_delta;
1380 }
1381 else if(frame_type==251)
1382 {
1383 method.stack_map_table[i].type
1384 =methodt::stack_map_table_entryt::SAME_EXTENDED;
1385 method.stack_map_table[i].locals.resize(0);
1386 method.stack_map_table[i].stack.resize(0);
1387 const u2 offset_delta = read<u2>();
1388 method.stack_map_table[i].offset_delta=offset_delta;
1389 }
1390 else if(252<=frame_type && frame_type<=254)
1391 {
1392 size_t new_locals = frame_type - 251;
1393 method.stack_map_table[i].type=methodt::stack_map_table_entryt::APPEND;
1394 method.stack_map_table[i].locals.resize(new_locals);
1395 method.stack_map_table[i].stack.resize(0);
1396 const u2 offset_delta = read<u2>();
1397 method.stack_map_table[i].offset_delta=offset_delta;
1398 for(size_t k=0; k<new_locals; k++)
1399 {
1400 method.stack_map_table[i].locals
1403 method.stack_map_table[i].locals.back();
1405 }
1406 }
1407 else if(frame_type==255)
1408 {
1409 method.stack_map_table[i].type=methodt::stack_map_table_entryt::FULL;
1410 const u2 offset_delta = read<u2>();
1411 method.stack_map_table[i].offset_delta=offset_delta;
1412 const u2 number_locals = read<u2>();
1413 method.stack_map_table[i].locals.resize(number_locals);
1414 for(size_t k=0; k<(size_t) number_locals; k++)
1415 {
1416 method.stack_map_table[i].locals
1419 method.stack_map_table[i].locals.back();
1421 }
1422 const u2 number_stack_items = read<u2>();
1423 method.stack_map_table[i].stack.resize(number_stack_items);
1424 for(size_t k=0; k<(size_t) number_stack_items; k++)
1425 {
1426 method.stack_map_table[i].stack
1429 method.stack_map_table[i].stack.back();
1431 }
1432 }
1433 else
1434 throw "error: unknown stack frame type encountered";
1435 }
1436 }
1437 else
1438 skip_bytes(attribute_length);
1439}
1440
1443{
1444 const u1 tag = read<u1>();
1445 switch(tag)
1446 {
1447 case VTYPE_INFO_TOP:
1448 v.type=methodt::verification_type_infot::TOP;
1449 break;
1450 case VTYPE_INFO_INTEGER:
1451 v.type=methodt::verification_type_infot::INTEGER;
1452 break;
1453 case VTYPE_INFO_FLOAT:
1454 v.type=methodt::verification_type_infot::FLOAT;
1455 break;
1456 case VTYPE_INFO_LONG:
1457 v.type=methodt::verification_type_infot::LONG;
1458 break;
1459 case VTYPE_INFO_DOUBLE:
1460 v.type=methodt::verification_type_infot::DOUBLE;
1461 break;
1463 v.type=methodt::verification_type_infot::ITEM_NULL;
1464 break;
1466 v.type=methodt::verification_type_infot::UNINITIALIZED_THIS;
1467 break;
1468 case VTYPE_INFO_OBJECT:
1469 v.type=methodt::verification_type_infot::OBJECT;
1470 v.cpool_index = read<u2>();
1471 break;
1472 case VTYPE_INFO_UNINIT:
1473 v.type=methodt::verification_type_infot::UNINITIALIZED;
1474 v.offset = read<u2>();
1475 break;
1476 default:
1477 throw "error: unknown verification type info encountered";
1478 }
1479}
1480
1482 std::vector<annotationt> &annotations)
1483{
1484 const u2 num_annotations = read<u2>();
1485
1486 for(u2 number=0; number<num_annotations; number++)
1487 {
1488 annotationt annotation;
1489 rRuntimeAnnotation(annotation);
1490 annotations.push_back(annotation);
1491 }
1492}
1493
1495 annotationt &annotation)
1496{
1497 const u2 type_index = read<u2>();
1498 annotation.type=type_entry(type_index);
1500}
1501
1503 annotationt::element_value_pairst &element_value_pairs)
1504{
1505 const u2 num_element_value_pairs = read<u2>();
1506 element_value_pairs.resize(num_element_value_pairs);
1507
1508 for(auto &element_value_pair : element_value_pairs)
1509 {
1510 const u2 element_name_index = read<u2>();
1511 element_value_pair.element_name=pool_entry(element_name_index).s;
1512 element_value_pair.value = get_relement_value();
1513 }
1514}
1515
1523{
1524 const u1 tag = read<u1>();
1525
1526 switch(tag)
1527 {
1528 case 'e':
1529 {
1530 UNUSED_u2(type_name_index);
1531 UNUSED_u2(const_name_index);
1532 // todo: enum
1533 return exprt();
1534 }
1535
1536 case 'c':
1537 {
1538 const u2 class_info_index = read<u2>();
1539 return symbol_exprt::typeless(pool_entry(class_info_index).s);
1540 }
1541
1542 case '@':
1543 {
1544 // TODO: return this wrapped in an exprt
1545 // another annotation, recursively
1546 annotationt annotation;
1547 rRuntimeAnnotation(annotation);
1548 return exprt();
1549 }
1550
1551 case '[':
1552 {
1553 const u2 num_values = read<u2>();
1554 exprt::operandst values;
1555 values.reserve(num_values);
1556 for(std::size_t i=0; i<num_values; i++)
1557 {
1558 values.push_back(get_relement_value());
1559 }
1560 return array_exprt(std::move(values), array_typet(typet(), exprt()));
1561 }
1562
1563 case 's':
1564 {
1565 const u2 const_value_index = read<u2>();
1566 return string_constantt(pool_entry(const_value_index).s);
1567 }
1568
1569 default:
1570 {
1571 const u2 const_value_index = read<u2>();
1572 return constant(const_value_index);
1573 }
1574 }
1575}
1576
1589 const u4 &attribute_length)
1590{
1591 classt &parsed_class = parse_tree.parsed_class;
1592 std::string name = parsed_class.name.c_str();
1593 const u2 number_of_classes = read<u2>();
1594 const u4 number_of_bytes_to_be_read = number_of_classes * 8 + 2;
1595 INVARIANT(
1596 number_of_bytes_to_be_read == attribute_length,
1597 "The number of bytes to be read for the InnerClasses attribute does not "
1598 "match the attribute length.");
1599
1600 const auto pool_entry_lambda = [this](u2 index) -> pool_entryt & {
1601 return pool_entry(index);
1602 };
1603 const auto remove_separator_char = [](std::string str, char ch) {
1604 str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1605 return str;
1606 };
1607
1608 for(int i = 0; i < number_of_classes; i++)
1609 {
1610 const u2 inner_class_info_index = read<u2>();
1611 const u2 outer_class_info_index = read<u2>();
1612 const u2 inner_name_index = read<u2>();
1613 const u2 inner_class_access_flags = read<u2>();
1614
1615 std::string inner_class_info_name =
1616 class_infot(pool_entry(inner_class_info_index))
1617 .get_name(pool_entry_lambda);
1618 bool is_private = (inner_class_access_flags & ACC_PRIVATE) != 0;
1619 bool is_public = (inner_class_access_flags & ACC_PUBLIC) != 0;
1620 bool is_protected = (inner_class_access_flags & ACC_PROTECTED) != 0;
1621 bool is_static = (inner_class_access_flags & ACC_STATIC) != 0;
1622
1623 // If the original parsed class name matches the inner class name,
1624 // the parsed class is an inner class, so overwrite the parsed class'
1625 // access information and mark it as an inner class.
1626 bool is_inner_class = remove_separator_char(id2string(parsed_class.name), '.') ==
1627 remove_separator_char(inner_class_info_name, '/');
1628 if(!is_inner_class)
1629 continue;
1630 parsed_class.is_inner_class = is_inner_class;
1631 parsed_class.is_static_class = is_static;
1632 // This is a marker that a class is anonymous.
1633 if(inner_name_index == 0)
1634 parsed_class.is_anonymous_class = true;
1635 else
1636 parsed_class.inner_name = pool_entry_lambda(inner_name_index).s;
1637 // Note that if outer_class_info_index == 0, the inner class is an anonymous
1638 // or local class, and is treated as private.
1639 if(outer_class_info_index == 0)
1640 {
1641 parsed_class.is_private = true;
1642 parsed_class.is_protected = false;
1643 parsed_class.is_public = false;
1644 }
1645 else
1646 {
1647 std::string outer_class_info_name =
1648 class_infot(pool_entry(outer_class_info_index))
1649 .get_name(pool_entry_lambda);
1650 parsed_class.outer_class =
1651 constant(outer_class_info_index).type().get(ID_C_base_name);
1652 parsed_class.is_private = is_private;
1653 parsed_class.is_protected = is_protected;
1654 parsed_class.is_public = is_public;
1655 }
1656 }
1657}
1658
1665{
1666 const u2 number_of_exceptions = read<u2>();
1667
1668 std::vector<irep_idt> exceptions;
1669 for(size_t i = 0; i < number_of_exceptions; i++)
1670 {
1671 const u2 exception_index_table = read<u2>();
1672 const irep_idt exception_name =
1673 constant(exception_index_table).type().get(ID_C_base_name);
1674 exceptions.push_back(exception_name);
1675 }
1676 return exceptions;
1677}
1678
1680{
1681 classt &parsed_class = parse_tree.parsed_class;
1682
1683 const u2 attribute_name_index = read<u2>();
1684 const u4 attribute_length = read<u4>();
1685
1686 irep_idt attribute_name=pool_entry(attribute_name_index).s;
1687
1688 if(attribute_name=="SourceFile")
1689 {
1690 const u2 sourcefile_index = read<u2>();
1691 irep_idt sourcefile_name;
1692
1693 const std::string &fqn(id2string(parsed_class.name));
1694 size_t last_index = fqn.find_last_of('.');
1695 if(last_index==std::string::npos)
1696 sourcefile_name=pool_entry(sourcefile_index).s;
1697 else
1698 {
1699 std::string package_name=fqn.substr(0, last_index+1);
1700 std::replace(package_name.begin(), package_name.end(), '.', '/');
1701 const std::string &full_file_name=
1702 package_name+id2string(pool_entry(sourcefile_index).s);
1703 sourcefile_name=full_file_name;
1704 }
1705
1706 for(auto &method : parsed_class.methods)
1707 {
1708 method.source_location.set_file(sourcefile_name);
1709 for(auto &instruction : method.instructions)
1710 {
1711 if(!instruction.source_location.get_line().empty())
1712 instruction.source_location.set_file(sourcefile_name);
1713 }
1714 }
1715 }
1716 else if(attribute_name=="Signature")
1717 {
1718 const u2 signature_index = read<u2>();
1719 parsed_class.signature=id2string(pool_entry(signature_index).s);
1721 parsed_class.signature.value(),
1723 }
1724 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1725 attribute_name=="RuntimeVisibleAnnotations")
1726 {
1728 }
1729 else if(attribute_name == "BootstrapMethods")
1730 {
1731 // for this attribute
1732 // cf. https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.23
1733 INVARIANT(
1734 !parsed_class.attribute_bootstrapmethods_read,
1735 "only one BootstrapMethods argument is allowed in a class file");
1736
1737 // mark as read in parsed class
1738 parsed_class.attribute_bootstrapmethods_read = true;
1740 }
1741 else if(attribute_name == "InnerClasses")
1742 {
1744 }
1745 else
1746 skip_bytes(attribute_length);
1747}
1748
1750{
1751 const u2 methods_count = read<u2>();
1752
1753 for(std::size_t j=0; j<methods_count; j++)
1754 rmethod();
1755}
1756
1757#define ACC_PUBLIC 0x0001u
1758#define ACC_PRIVATE 0x0002u
1759#define ACC_PROTECTED 0x0004u
1760#define ACC_STATIC 0x0008u
1761#define ACC_FINAL 0x0010u
1762#define ACC_VARARGS 0x0080u
1763#define ACC_SUPER 0x0020u
1764#define ACC_VOLATILE 0x0040u
1765#define ACC_TRANSIENT 0x0080u
1766#define ACC_INTERFACE 0x0200u
1767#define ACC_ABSTRACT 0x0400u
1768#define ACC_SYNTHETIC 0x1000u
1769#define ACC_ANNOTATION 0x2000u
1770#define ACC_ENUM 0x4000u
1771
1773{
1775
1776 const u2 access_flags = read<u2>();
1777 const u2 name_index = read<u2>();
1778 const u2 descriptor_index = read<u2>();
1779
1780 method.is_final=(access_flags&ACC_FINAL)!=0;
1781 method.is_static=(access_flags&ACC_STATIC)!=0;
1782 method.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
1783 method.is_public=(access_flags&ACC_PUBLIC)!=0;
1784 method.is_protected=(access_flags&ACC_PROTECTED)!=0;
1785 method.is_private=(access_flags&ACC_PRIVATE)!=0;
1786 method.is_synchronized=(access_flags&ACC_SYNCHRONIZED)!=0;
1787 method.is_native=(access_flags&ACC_NATIVE)!=0;
1788 method.is_bridge = (access_flags & ACC_BRIDGE) != 0;
1789 method.is_varargs = (access_flags & ACC_VARARGS) != 0;
1790 method.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
1791 method.name=pool_entry(name_index).s;
1792 method.base_name=pool_entry(name_index).s;
1793 method.descriptor=id2string(pool_entry(descriptor_index).s);
1794
1795 const auto flags = (method.is_public ? 1 : 0) +
1796 (method.is_protected?1:0)+
1797 (method.is_private?1:0);
1798 DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
1799 const u2 attributes_count = read<u2>();
1800
1801 for(std::size_t j=0; j<attributes_count; j++)
1802 rmethod_attribute(method);
1803}
1804
1806 std::istream &istream,
1807 const irep_idt &class_name,
1808 message_handlert &message_handler,
1809 bool skip_instructions)
1810{
1811 java_bytecode_parsert java_bytecode_parser(skip_instructions);
1812 java_bytecode_parser.in=&istream;
1813 java_bytecode_parser.set_message_handler(message_handler);
1814
1815 bool parser_result=java_bytecode_parser.parse();
1816
1817 if(
1818 parser_result ||
1819 java_bytecode_parser.parse_tree.parsed_class.name != class_name)
1820 {
1821 return {};
1822 }
1823
1824 return std::move(java_bytecode_parser.parse_tree);
1825}
1826
1828 const std::string &file,
1829 const irep_idt &class_name,
1830 message_handlert &message_handler,
1831 bool skip_instructions)
1832{
1833 std::ifstream in(file, std::ios::binary);
1834
1835 if(!in)
1836 {
1837 return {};
1838 }
1839
1840 return java_bytecode_parse(
1841 in, class_name, message_handler, skip_instructions);
1842}
1843
1848{
1849 const u2 local_variable_type_table_length = read<u2>();
1850
1851 INVARIANT(
1852 local_variable_type_table_length<=method.local_variable_table.size(),
1853 "Local variable type table cannot have more elements "
1854 "than the local variable table.");
1855 for(std::size_t i=0; i<local_variable_type_table_length; i++)
1856 {
1857 const u2 start_pc = read<u2>();
1858 const u2 length = read<u2>();
1859 const u2 name_index = read<u2>();
1860 const u2 signature_index = read<u2>();
1861 const u2 index = read<u2>();
1862
1863 bool found=false;
1864 for(auto &lvar : method.local_variable_table)
1865 {
1866 // compare to entry in LVT
1867 if(lvar.index==index &&
1868 lvar.name==pool_entry(name_index).s &&
1869 lvar.start_pc==start_pc &&
1870 lvar.length==length)
1871 {
1872 found=true;
1873 lvar.signature=id2string(pool_entry(signature_index).s);
1874 break;
1875 }
1876 }
1877 INVARIANT(
1878 found,
1879 "Entry in LocalVariableTypeTable must be present in LVT");
1880 }
1881}
1882
1891{
1892 switch(java_handle_kind)
1893 {
1906 default:
1908 }
1909}
1910
1918{
1919 const std::function<pool_entryt &(u2)> pool_entry_lambda =
1920 [this](u2 index) -> pool_entryt & { return pool_entry(index); };
1921
1922 const base_ref_infot &ref_entry = entry.get_reference(pool_entry_lambda);
1923
1924 const class_infot &class_entry = ref_entry.get_class(pool_entry_lambda);
1925 const name_and_type_infot &name_and_type =
1926 ref_entry.get_name_and_type(pool_entry_lambda);
1927
1928 // The following lambda kinds specified in the JVMS (see for example
1929 // https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-5.html#jvms-5.4.3.5
1930 // but which aren't actually created by javac. Java has no syntax for a field-
1931 // refernce like this, and even writing a stereotypical lambda like
1932 // Producer<FieldType> = instance -> instance.field does not generate this
1933 // kind of handle, but rather a synthetic method implementing the getfield
1934 // operation.
1935 // We don't implement a handle kind that can't be produced yet, but should
1936 // they ever turn up this is where to fix them.
1937
1938 if(
1939 entry.get_handle_kind() ==
1941 entry.get_handle_kind() ==
1943 entry.get_handle_kind() ==
1945 entry.get_handle_kind() ==
1947 {
1948 return {};
1949 }
1950
1951 irep_idt class_name =
1952 java_classname(class_entry.get_name(pool_entry_lambda)).get_identifier();
1953
1954 irep_idt method_name = name_and_type.get_name(pool_entry_lambda);
1955 std::string descriptor = name_and_type.get_descriptor(pool_entry_lambda);
1956 irep_idt mangled_method_name = id2string(method_name) + ":" + descriptor;
1957 typet method_type = *java_type_from_string(descriptor);
1958
1959 method_handle_typet handle_type =
1961
1962 class_method_descriptor_exprt method_descriptor{
1963 method_type, mangled_method_name, class_name, method_name};
1964 lambda_method_handlet lambda_method_handle{method_descriptor, handle_type};
1965
1966 return lambda_method_handle;
1967}
1968
1971{
1972 const u2 num_bootstrap_methods = read<u2>();
1973 for(size_t bootstrap_method_index = 0;
1974 bootstrap_method_index < num_bootstrap_methods;
1975 ++bootstrap_method_index)
1976 {
1977 const u2 bootstrap_methodhandle_ref = read<u2>();
1978 const pool_entryt &entry = pool_entry(bootstrap_methodhandle_ref);
1979
1980 method_handle_infot method_handle{entry};
1981
1982 const u2 num_bootstrap_arguments = read<u2>();
1983 debug() << "INFO: parse BootstrapMethod handle " << num_bootstrap_arguments
1984 << " #args" << eom;
1985
1986 // read u2 values of entry into vector
1987 std::vector<u2> u2_values(num_bootstrap_arguments);
1988 for(size_t i = 0; i < num_bootstrap_arguments; i++)
1989 u2_values[i] = read<u2>();
1990
1991 // try parsing bootstrap method handle
1992 // each entry contains a MethodHandle structure
1993 // u2 tag
1994 // u2 reference kind which must be in the range from 1 to 9
1995 // u2 reference index into the constant pool
1996 //
1997 // reference kinds use the following
1998 // 1 to 4 must point to a CONSTANT_Fieldref structure
1999 // 5 or 8 must point to a CONSTANT_Methodref structure
2000 // 6 or 7 must point to a CONSTANT_Methodref or
2001 // CONSTANT_InterfaceMethodref structure, if the class file version
2002 // number is 52.0 or above, to a CONSTANT_Methodref only in the case
2003 // of less than 52.0
2004 // 9 must point to a CONSTANT_InterfaceMethodref structure
2005
2006 // the index must point to a CONSTANT_String
2007 // CONSTANT_Class
2008 // CONSTANT_Integer
2009 // CONSTANT_Long
2010 // CONSTANT_Float
2011 // CONSTANT_Double
2012 // CONSTANT_MethodHandle
2013 // CONSTANT_MethodType
2014
2015 // We read the three arguments here to see whether they correspond to
2016 // our hypotheses for this being a lambda function entry.
2017
2018 // Need at least 3 arguments, the interface type, the method hanlde
2019 // and the method_type, otherwise it doesn't look like a call that we
2020 // understand
2021 if(num_bootstrap_arguments < 3)
2022 {
2023 store_unknown_method_handle(bootstrap_method_index);
2024 debug()
2025 << "format of BootstrapMethods entry not recognized: too few arguments"
2026 << eom;
2027 continue;
2028 }
2029
2030 u2 interface_type_index = u2_values[0];
2031 u2 method_handle_index = u2_values[1];
2032 u2 method_type_index = u2_values[2];
2033
2034 // The additional arguments for the altmetafactory call are skipped,
2035 // as they are currently not used. We verify though that they are of
2036 // CONSTANT_Integer type, cases where this does not hold will be
2037 // analyzed further.
2038 bool recognized = true;
2039 for(size_t i = 3; i < num_bootstrap_arguments; i++)
2040 {
2041 u2 skipped_argument = u2_values[i];
2042 recognized &= pool_entry(skipped_argument).tag == CONSTANT_Integer;
2043 }
2044
2045 if(!recognized)
2046 {
2047 debug() << "format of BootstrapMethods entry not recognized: extra "
2048 "arguments of wrong type"
2049 << eom;
2050 store_unknown_method_handle(bootstrap_method_index);
2051 continue;
2052 }
2053
2054 const pool_entryt &interface_type_argument =
2055 pool_entry(interface_type_index);
2056 const pool_entryt &method_handle_argument = pool_entry(method_handle_index);
2057 const pool_entryt &method_type_argument = pool_entry(method_type_index);
2058
2059 if(
2060 interface_type_argument.tag != CONSTANT_MethodType ||
2061 method_handle_argument.tag != CONSTANT_MethodHandle ||
2062 method_type_argument.tag != CONSTANT_MethodType)
2063 {
2064 debug() << "format of BootstrapMethods entry not recognized: arguments "
2065 "wrong type"
2066 << eom;
2067 store_unknown_method_handle(bootstrap_method_index);
2068 continue;
2069 }
2070
2071 debug() << "INFO: parse lambda handle" << eom;
2073 parse_method_handle(method_handle_infot{method_handle_argument});
2074
2075 if(!lambda_method_handle.has_value())
2076 {
2077 debug() << "format of BootstrapMethods entry not recognized: method "
2078 "handle not recognised"
2079 << eom;
2080 store_unknown_method_handle(bootstrap_method_index);
2081 continue;
2082 }
2083
2084 // If parse_method_handle can't parse the lambda method, it should return {}
2086 lambda_method_handle->handle_type != method_handle_typet::UNKNOWN_HANDLE);
2087
2088 debug() << "lambda function reference "
2089 << id2string(lambda_method_handle->get_method_descriptor()
2090 .base_method_name())
2091 << " in class \"" << parse_tree.parsed_class.name << "\""
2092 << "\n interface type is "
2093 << id2string(pool_entry(interface_type_argument.ref1).s)
2094 << "\n method type is "
2095 << id2string(pool_entry(method_type_argument.ref1).s) << eom;
2097 bootstrap_method_index, *lambda_method_handle);
2098 }
2099}
2100
2105 size_t bootstrap_method_index)
2106{
2110 bootstrap_method_index, lambda_method_handle);
2111}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
struct bytecode_infot const bytecode_info[]
#define BC_wide
int16_t s2
int8_t s1
int32_t s4
uint16_t u2
uint8_t u1
uint64_t u8
uint32_t u4
Array constructor from list of elements.
Definition std_expr.h:1476
Arrays with given size.
Definition std_types.h:763
class_infot get_class(const pool_entry_lookupt &pool_entry) const
name_and_type_infot get_name_and_type(const pool_entry_lookupt &pool_entry) const
u2 get_name_and_type_index() const
base_ref_infot(const pool_entryt &entry)
Corresponds to the CONSTANT_Class_info Structure Described in Java 8 specification 4....
class_infot(const pool_entryt &entry)
std::string get_name(const pool_entry_lookupt &pool_entry) const
An expression describing a method on a class.
Definition std_expr.h:3272
const parameterst & parameters() const
Definition std_types.h:655
const typet & return_type() const
Definition std_types.h:645
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:37
bool empty() const
Definition dstring.h:88
const char * c_str() const
Definition dstring.h:99
Base class for all expressions.
Definition expr.h:54
std::vector< exprt > operandst
Definition expr.h:56
typet & type()
Return the type of the expression.
Definition expr.h:82
const irep_idt & get(const irep_idt &name) const
Definition irep.cpp:45
const irep_idt & id() const
Definition irep.h:396
pool_entryt & pool_entry(u2 index)
void get_annotation_class_refs(const std::vector< annotationt > &annotations)
For each of the given annotations, get a reference to its class and recursively get class references ...
java_bytecode_parsert(bool skip_instructions)
void store_unknown_method_handle(size_t bootstrap_method_index)
Creates an unknown method handle and puts it into the parsed_class.
exprt get_relement_value()
Corresponds to the element_value structure Described in Java 8 specification 4.7.16....
java_bytecode_parse_treet::annotationt annotationt
java_bytecode_parse_treet::methodt methodt
void rRuntimeAnnotation_attribute(std::vector< annotationt > &)
const typet type_entry(u2 index)
optionalt< lambda_method_handlet > parse_method_handle(const class method_handle_infot &entry)
Read method handle pointed to from constant pool entry at index, return type of method handle and nam...
void rbytecode(std::vector< instructiont > &)
void rcode_attribute(methodt &method)
void read_bootstrapmethods_entry()
Read all entries of the BootstrapMethods attribute of a class file.
void get_class_refs()
Get the class references for the benefit of a dependency analysis.
void get_annotation_value_class_refs(const exprt &value)
See java_bytecode_parsert::get_annotation_class_refs.
java_bytecode_parse_treet parse_tree
std::vector< pool_entryt > constant_poolt
void skip_bytes(std::size_t bytes)
void read_verification_type_info(methodt::verification_type_infot &)
java_bytecode_parse_treet::fieldt fieldt
void get_class_refs_rec(const typet &)
void parse_local_variable_type_table(methodt &method)
Parses the local variable type table of a method.
void rRuntimeAnnotation(annotationt &)
void relement_value_pairs(annotationt::element_value_pairst &)
void rinner_classes_attribute(const u4 &attribute_length)
Corresponds to the element_value structure Described in Java 8 specification 4.7.6 https://docs....
std::vector< irep_idt > rexceptions_attribute()
Corresponds to the element_value structure Described in Java 8 specification 4.7.5 https://docs....
void rmethod_attribute(methodt &method)
method_handle_kindt
Indicates what sort of code should be synthesised for a lambda call:
Definition java_types.h:465
@ LAMBDA_STATIC_METHOD_HANDLE
Direct call to the given method.
@ LAMBDA_VIRTUAL_METHOD_HANDLE
Virtual call to the given interface or method.
@ LAMBDA_CONSTRUCTOR_HANDLE
Instantiate the needed type then call a constructor.
mstreamt & error() const
Definition message.h:399
mstreamt & debug() const
Definition message.h:429
virtual void set_message_handler(message_handlert &_message_handler)
Definition message.h:179
mstreamt & result() const
Definition message.h:409
static eomt eom
Definition message.h:297
method_handle_infot(const pool_entryt &entry)
base_ref_infot get_reference(const pool_entry_lookupt &pool_entry) const
method_handle_kindt handle_kind
method_handle_kindt get_handle_kind() const
method_handle_kindt
Correspond to the different valid values for field handle_kind From Java 8 spec 4....
Corresponds to the CONSTANT_NameAndType_info Structure Described in Java 8 specification 4....
name_and_type_infot(const pool_entryt &entry)
std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
std::string get_name(const pool_entry_lookupt &pool_entry) const
std::istream * in
Definition parser.h:26
Fixed-width bit-vector with two's complement interpretation.
const irep_idt & get_function() const
void set_java_bytecode_index(const irep_idt &index)
void set_line(const irep_idt &line)
void set_function(const irep_idt &function)
A struct tag type, i.e., struct_typet with an identifier.
Definition std_types.h:449
Structure type, corresponds to C style structs.
Definition std_types.h:231
const componentst & components() const
Definition std_types.h:147
std::function< pool_entryt &(u2)> pool_entry_lookupt
static std::string read_utf8_constant(const pool_entryt &entry)
structured_pool_entryt(const pool_entryt &entry)
static symbol_exprt typeless(const irep_idt &id)
Generate a symbol_exprt without a proper type.
Definition std_expr.h:99
const irep_idt & get_identifier() const
Definition std_types.h:410
An expression denoting a type.
Definition std_expr.h:2771
The type of an expression, extends irept.
Definition type.h:29
const typet & subtype() const
Definition type.h:48
Fixed-width bit-vector with unsigned binary interpretation.
static format_containert< T > format(const T &o)
Definition format.h:37
const std::string & id2string(const irep_idt &d)
Definition irep.h:47
#define CONSTANT_Fieldref
#define CONSTANT_String
#define VTYPE_INFO_DOUBLE
#define UNUSED_u2(x)
#define ACC_PUBLIC
#define CONSTANT_InvokeDynamic
#define VTYPE_INFO_FLOAT
#define ACC_BRIDGE
#define ACC_NATIVE
#define T_SHORT
#define ACC_ENUM
#define CONSTANT_Methodref
#define T_LONG
#define T_FLOAT
#define ACC_VARARGS
#define CONSTANT_Long
#define T_INT
#define VTYPE_INFO_LONG
#define VTYPE_INFO_TOP
#define CONSTANT_NameAndType
#define ACC_ABSTRACT
#define VTYPE_INFO_OBJECT
#define ACC_ANNOTATION
optionalt< java_bytecode_parse_treet > java_bytecode_parse(std::istream &istream, const irep_idt &class_name, message_handlert &message_handler, bool skip_instructions)
Attempt to parse a Java class from the given stream.
#define T_DOUBLE
#define ACC_SYNCHRONIZED
#define T_BYTE
#define CONSTANT_Class
#define ACC_STATIC
#define CONSTANT_Integer
#define CONSTANT_MethodType
#define VTYPE_INFO_INTEGER
static java_class_typet::method_handle_kindt get_method_handle_type(method_handle_infot::method_handle_kindt java_handle_kind)
Translate the lambda method reference kind in a class file into the kind of handling the method requi...
#define ACC_PROTECTED
#define CONSTANT_Double
#define ACC_PRIVATE
#define CONSTANT_Float
#define VTYPE_INFO_UNINIT
#define CONSTANT_InterfaceMethodref
#define T_BOOLEAN
#define ACC_INTERFACE
#define ACC_SYNTHETIC
#define VTYPE_INFO_UNINIT_THIS
#define ACC_FINAL
#define CONSTANT_MethodHandle
#define T_CHAR
#define VTYPE_INFO_ITEM_NULL
#define CONSTANT_Utf8
Representation of a constant Java string.
void get_dependencies_from_generic_parameters(const std::string &signature, std::set< irep_idt > &refs)
Collect information about generic type parameters from a given signature.
optionalt< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
struct_tag_typet java_classname(const std::string &id)
bool is_java_array_tag(const irep_idt &tag)
See above.
const typet & java_array_element_type(const struct_tag_typet &array_symbol)
Return a const reference to the element type of a given java array type.
const java_method_typet & to_java_method_type(const typet &type)
Definition java_types.h:184
optionalt< typet > java_type_from_string_with_exception(const std::string &descriptor, const optionalt< std::string > &signature, const std::string &class_name)
static optionalt< java_class_typet::java_lambda_method_handlet > lambda_method_handle(const symbol_tablet &symbol_table, const irep_idt &method_identifier, const java_method_typet &dynamic_method_type)
nonstd::optional< T > optionalt
Definition optional.h:35
Parser utilities.
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:510
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
#define POSTCONDITION(CONDITION)
Definition invariant.h:479
API to expression classes.
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition std_types.h:308
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition std_types.h:474
Definition kdev_t.h:19
std::vector< element_value_pairt > element_value_pairst
void add_method_handle(size_t bootstrap_index, const lambda_method_handlet &handle)
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.