38 auto original_goto_model =
41 if(!original_goto_model.has_value())
43 log.error() <<
"Unable to read goto binary for linker script merging"
49 std::list<irep_idt> linker_defined_symbols;
51 linker_defined_symbols,
52 original_goto_model->symbol_table,
54 linker_def_outfile());
79 original_goto_model->symbol_table,
83 log.error() <<
"Could not add linkerscript defs to symbol table"
91 *original_goto_model,
log.get_message_handler());
105 log.error() <<
"Could not pointerize all linker-defined expressions"
112 *original_goto_model,
113 cmdline.isset(
"validate-goto-model"),
114 log.get_message_handler());
118 log.error() <<
"Could not write linkerscript-augmented binary"
133 log(message_handler),
136 "address of array's first member",
142 [](
const exprt &expr)
144 return expr.
id() == ID_address_of &&
145 expr.
type().
id() == ID_pointer &&
153 .
id() == ID_symbol &&
165 .
id() == ID_signedbv;
171 [](
const exprt &expr)
173 return expr.
id() == ID_address_of &&
174 expr.
type().
id() == ID_pointer &&
183 [](
const exprt &expr)
185 return expr.
id() == ID_address_of &&
186 expr.
type().
id() == ID_pointer &&
197 [](
const exprt &expr)
198 {
return expr.
id() == ID_symbol && expr.
type().
id() == ID_array; }),
200 "pointer (does not need pointerizing)",
203 [](
const exprt &expr)
204 {
return expr.
id() == ID_symbol && expr.
type().
id() == ID_pointer; })})
221 std::list<symbol_exprt> to_pointerize;
224 if(to_pointerize.empty())
226 log.debug() <<
"Pointerizing the symbol-table value of symbol " << it->first
229 it.get_writeable_symbol().value, to_pointerize, linker_values);
230 if(to_pointerize.empty() && fail==0)
233 for(
const auto &sym : to_pointerize)
235 log.error() <<
" Could not pointerize '" << sym.get_identifier()
236 <<
"' in symbol table entry " << it->first <<
". Pretty:\n"
237 << sym.pretty() <<
"\n";
249 std::list<exprt *> expressions = {&instruction.code_nonconst()};
250 if(instruction.has_condition())
251 expressions.push_back(&instruction.condition_nonconst());
253 for(
exprt *insts : expressions)
255 std::list<symbol_exprt> to_pointerize;
257 if(to_pointerize.empty())
261 if(to_pointerize.empty() && fail==0)
264 for(
const auto &sym : to_pointerize)
266 log.error() <<
" Could not pointerize '" << sym.get_identifier()
267 <<
"' in function " << gf.first <<
". Pretty:\n"
268 << sym.pretty() <<
"\n";
269 log.error().source_location = instruction.source_location();
283 const std::string &shape)
285 auto it=linker_values.find(ident);
286 if(it==linker_values.end())
288 log.error() <<
"Could not find a new expression for linker script-defined "
294 log.debug() <<
"Pointerizing linker-defined symbol '" << ident
302 std::list<symbol_exprt> &to_pointerize,
306 for(
auto const &pair : linker_values)
309 if(!pattern.match(expr))
312 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
315 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
316 pattern.description());
318 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
320 if(result==to_pointerize.end())
327 to_pointerize.erase(result);
346 std::list<symbol_exprt> &to_pointerize)
const
348 for(
const auto &op : expr.
operands())
350 if(op.id()!=ID_symbol)
354 if(pair!=linker_values.end())
355 to_pointerize.push_back(sym_exp);
357 for(
const auto &op : expr.
operands())
362The current implementation of
this function is less precise than the
363 commented-out version below. To understand the difference between these
364 implementations, consider the following example:
366Suppose we have a section in the linker script, 100 bytes long, where the
367address of the symbol sec_start is the start of the section (value 4096) and the
368address of sec_end is the end of that section (value 4196).
370The current implementation synthesizes the
goto-version of the following C:
372 char __sec_array[100];
373 char *sec_start=(&__sec_array[0]);
374 char *sec_end=((&__sec_array[0])+100);
378This is imprecise
for the following reason: the actual address of the array and
379the pointers shall be some random CBMC-internal address, instead of being 4096
380and 4196. The linker script, on the other hand, would have specified the exact
381position of the section, and we even know what the actual values of sec_start
382and sec_end are from the
object file (these values are in the `addresses` list
383of the `
data` argument to
this function). If the correctness of the code depends
384on these actual values, then CBMCs model of the code is too imprecise to verify
387The commented-out version of
this function below synthesizes the following:
389 char *sec_start=4096;
393This code has both the actual addresses of the start and end of the section and
394tells CBMC that the intermediate region is valid. However, the allocated_memory
395macro does not currently allocate an actual
object at the address 4096, so
396symbolic execution can fail. In particular, the
'allocated memory' is part of
397__CPROVER_memory, which does not have a bounded size;
this means that (
for
398example) calls to memcpy or memset fail, because the first and third arguments
399do not have know n size. The commented-out implementation should be reinstated
402In either
case,
no other changes to the rest of the code (outside
this function)
403should be necessary. The rest of
this file converts expressions containing the
404linker-defined symbol into pointer types
if they were not already, and
this is
405the right behaviour
for both implementations.
409 const std::string &linker_script,
414 std::map<irep_idt, std::size_t> truncated_symbols;
417 bool has_end=d[
"has-end-symbol"].is_true();
421 << d[
"start-symbol"].value;
430 log.warning() <<
"Object section '" << d[
"section"].value <<
"' of size "
431 << array_size <<
" is too large to model. Truncating to "
443 std::ostringstream array_comment;
444 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size "
445 << array_size <<
" bytes";
457 array_sym.
value = *zi;
460 bool failed = symbol_table.
add(array_sym);
472 start_sym.
value = array_start;
473 linker_values.emplace(
474 d[
"start-symbol"].value,
475 std::make_pair(start_sym.
symbol_expr(), array_start));
479 auto it = std::find_if(
483 return add[
"sym"].value == d[
"start-symbol"].value;
487 log.error() <<
"Start: Could not find address corresponding to symbol '"
488 << d[
"start-symbol"].value <<
"' (start of section)"
494 std::ostringstream start_comment;
495 start_comment <<
"Pointer to beginning of object section '"
496 << d[
"section"].value <<
"'. Original address in object file"
497 <<
" is " << (*it)[
"val"].value;
501 auto start_sym_entry = symbol_table.
insert(start_sym);
502 if(!start_sym_entry.second)
503 start_sym_entry.first = start_sym;
507 plus_exprt array_end(array_start, array_size_expr);
513 end_sym.
value = array_end;
514 linker_values.emplace(
515 d[
"end-symbol"].value,
518 auto entry = std::find_if(
522 return add[
"sym"].value == d[
"end-symbol"].value;
526 log.debug() <<
"Could not find address corresponding to symbol '"
527 << d[
"end-symbol"].value <<
"' (end of section)"
533 std::ostringstream end_comment;
534 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
535 <<
"'. Original address in object file"
536 <<
" is " << (*entry)[
"val"].value;
540 auto end_sym_entry = symbol_table.
insert(end_sym);
541 if(!end_sym_entry.second)
542 end_sym_entry.first = end_sym;
553 auto it=linker_values.find(
irep_idt(d[
"sym"].value));
554 if(it!=linker_values.end())
561 const auto &pair=truncated_symbols.find(d[
"sym"].value);
562 if(pair==truncated_symbols.end())
563 symbol_value=d[
"val"].value;
567 <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from "
569 <<
" because it corresponds to the size of a too-large section."
576 loc.
set_comment(
"linker script-defined symbol: char *"+
577 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
592 symbol.
value = rhs_tc;
594 linker_values.emplace(
614 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
616 f.add_source_location()=loc;
619 i.make_function_call(f);
620 initialize_instructions.push_front(i);
630 symbol_table.
add(sym);
637 loc.
set_comment(
"linker script-defined symbol: char *"+
638 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
650 linker_values.emplace(
651 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
654 assign.add_source_location()=loc;
656 assign_i.make_assignment(assign);
657 initialize_instructions.push_front(assign_i);
664 std::list<irep_idt> &linker_defined_symbols,
666 const std::string &out_file,
667 const std::string &def_out_file)
669 for(
auto const &pair : symbol_table.
symbols)
672 pair.second.is_extern && pair.second.value.is_nil() &&
675 linker_defined_symbols.push_back(pair.second.name);
679 std::ostringstream linker_def_str;
681 linker_defined_symbols.begin(),
682 linker_defined_symbols.end(),
683 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
684 log.debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n"
688 std::ofstream linker_def_file(linker_def_infile());
689 linker_def_file << linker_def_str.str();
690 linker_def_file.close();
692 std::vector<std::string> argv=
695 "--script",
cmdline.get_value(
'T'),
696 "--object", out_file,
697 "--sym-file", linker_def_infile(),
698 "--out-file", def_out_file
702 argv.push_back(
"--very-verbose");
704 argv.push_back(
"--verbose");
706 log.debug() <<
"RUN:";
707 for(std::size_t i=0; i<argv.size(); i++)
708 log.debug() <<
" " << argv[i];
711 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
719 const std::list<irep_idt> &linker_defined_symbols,
723 for(
const auto &sym : linker_defined_symbols)
724 if(linker_values.find(sym)==linker_values.end())
726 log.warning() <<
"Variable '" << sym
727 <<
"' was declared extern but never given a value, even in "
732 linker_values.emplace(sym, std::make_pair(null_sym, null_pointer));
735 for(
const auto &pair : linker_values)
737 auto it=std::find(linker_defined_symbols.begin(),
738 linker_defined_symbols.end(), pair.first);
739 if(it==linker_defined_symbols.end())
743 <<
"Linker script-defined symbol '" << pair.first <<
"' was "
744 <<
"either defined in the C source code, not declared extern in "
745 <<
"the C source code, or does not appear in the C source code"
754 if(!
data.is_object())
759 !(data_object.
find(
"regions") != data_object.
end() &&
760 data_object.
find(
"addresses") != data_object.
end() &&
761 data[
"regions"].is_array() &&
data[
"addresses"].is_array() &&
770 return address.
find(
"val") != address.
end() &&
771 address.
find(
"sym") != address.
end() &&
782 return region.
find(
"start") != region.
end() &&
783 region.
find(
"size") != region.
end() &&
784 region.
find(
"annot") != region.
end() &&
785 region.
find(
"commt") != region.
end() &&
786 region.
find(
"start-symbol") != region.
end() &&
787 region.
find(
"has-end-symbol") != region.
end() &&
788 region.
find(
"section") != region.
end() &&
793 ((region[
"has-end-symbol"].
is_true() &&
794 region.
find(
"end-symbol") != region.
end() &&
796 (region[
"has-end-symbol"].
is_false() &&
797 region.
find(
"size-symbol") != region.
end() &&
798 region.
find(
"end-symbol") == region.
end() &&
std::string array_name(const namespacet &ns, const exprt &expr)
unsignedbv_typet unsigned_int_type()
unsignedbv_typet size_type()
pointer_typet pointer_type(const typet &subtype)
bitvector_typet char_type()
Operator to return the address of an object.
A goto_instruction_codet representing an assignment in the program.
goto_instruction_codet representation of a function call statement.
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
A constant literal expression.
void set_value(const irep_idt &value)
Base class for all expressions.
bool is_constant() const
Return whether the expression is a constant.
typet & type()
Return the type of the expression.
const source_locationt & source_location() const
source_locationt & add_source_location()
function_mapt function_map
symbol_tablet symbol_table
Symbol table.
goto_functionst goto_functions
GOTO functions.
This class represents an instruction in the GOTO intermediate representation.
A generic container class for the GOTO intermediate representation of one function.
instructionst instructions
The list of instructions in the goto program.
std::list< instructiont > instructionst
const irep_idt & id() const
iterator find(const std::string &key)
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
const std::string & elf_binary
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
int ls_data2instructions(jsont &data, const std::string &linker_script, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into the symbol_table.
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
const std::string & goto_binary
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
The null pointer constant.
The plus expression Associativity is not specified.
Patterns of expressions that should be replaced.
void set_comment(const irep_idt &comment)
void set_file(const irep_idt &file)
Expression to hold a symbol (variable)
const irep_idt & get_identifier() const
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
virtual iteratort begin() override
virtual iteratort end() override
virtual std::pair< symbolt &, bool > insert(symbolt symbol) override
Author: Diffblue Ltd.
source_locationt location
Source code location of definition of symbol.
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
typet type
Type of symbol.
irep_idt pretty_name
Language-specific display name.
exprt value
Initial value of symbol.
Semantic type conversion.
static exprt conditional_cast(const exprt &expr, const typet &type)
Compile and link source and object files.
std::optional< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
Goto Programs with Functions.
const std::string & id2string(const irep_idt &d)
copy_constructor_body add(code_assign_string_expr_to_java_string(arg_this, string_expr, symbol_table, true), loc)
json_objectt & to_json_object(jsont &json)
json_arrayt & to_json_array(jsont &json)
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Merge linker script-defined symbols into a goto-program.
Magic numbers used throughout the codebase.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
const mp_integer string2integer(const std::string &n, unsigned base)
API to expression classes for Pointers.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
int run(const std::string &what, const std::vector< std::string > &argv)
#define CHECK_RETURN(CONDITION)
void recreate_initialize_function(goto_modelt &goto_model, message_handlert &message_handler)
Regenerates the CPROVER_INITIALIZE function, which initializes all non-function symbols of the goto m...
#define INITIALIZE_FUNCTION
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
unsigned safe_string2unsigned(const std::string &str, int base)
static bool failed(bool error_indicator)