1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
//! This module handles the string processing at external symbol calls.

use regex::Regex;
use std::collections::BTreeMap;

use crate::abstract_domain::{
    AbstractIdentifier, DomainInsertion, HasTop, IntervalDomain, TryToBitvec,
};
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::intermediate_representation::{Bitvector, Datatype};
use crate::{abstract_domain::AbstractDomain, intermediate_representation::ExternSymbol};

use super::super::state::State;
use super::Context;

mod memcpy;
mod scanf;
mod sprintf;
mod strcat;

impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Context<'a, T> {
    /// Handles generic symbol calls by deleting all non callee saved pointer entries.
    pub fn handle_generic_symbol_calls(
        &self,
        extern_symbol: &ExternSymbol,
        state: &State<T>,
    ) -> State<T> {
        let mut new_state = state.clone();
        new_state.remove_non_callee_saved_pointer_entries_for_external_symbol(
            self.project,
            extern_symbol,
        );

        new_state
    }

    /// Handles calls to external symbols for which no ExternSymbol object is known.
    pub fn handle_unknown_symbol_calls(&self, state: &mut State<T>) {
        if let Some(standard_cconv) = self.project.get_standard_calling_convention() {
            let mut filtered_map = state.get_variable_to_pointer_map().clone();
            for (register, _) in state.get_variable_to_pointer_map().clone().iter() {
                if !standard_cconv.callee_saved_register.contains(register) {
                    filtered_map.remove(register);
                }
            }

            state.set_variable_to_pointer_map(filtered_map);
        }
    }

    /// The output of a string symbol is added to the map of abstract strings.
    /// If the symbol returns a format string, the string is approximated
    /// as good as possible by checking the input parameters.
    pub fn handle_string_symbol_calls(
        &self,
        extern_symbol: &ExternSymbol,
        state: &State<T>,
    ) -> State<T> {
        let mut new_state = match extern_symbol.name.as_str() {
            "scanf" | "__isoc99_scanf" => self.handle_scanf_calls(state, extern_symbol),
            "sscanf" | "__isoc99_sscanf" => self.handle_sscanf_calls(state, extern_symbol),
            "sprintf" | "snprintf" | "vsprintf" | "vsnprintf" => {
                self.handle_sprintf_and_snprintf_calls(state, extern_symbol)
            }
            "strcat" | "strncat" => self.handle_strcat_and_strncat_calls(state, extern_symbol),
            "memcpy" => self.handle_memcpy_calls(state, extern_symbol),
            "free" => self.handle_free(state, extern_symbol),
            _ => panic!("Unexpected Extern Symbol."),
        };

        new_state.remove_non_callee_saved_pointer_entries_for_external_symbol(
            self.project,
            extern_symbol,
        );

        new_state
    }

    /// Takes the pointer target if there is only one and checks whether the target
    /// is inside the current stack frame. If so, the string domain is added to the
    /// analysis.
    pub fn add_new_string_abstract_domain(
        state: &mut State<T>,
        pi_state: &PointerInferenceState,
        pointer: &BTreeMap<AbstractIdentifier, IntervalDomain>,
        domain_input_string: T,
    ) {
        for (target, offset) in pointer.iter() {
            if pi_state.stack_id == *target {
                if let Ok(offset_value) = offset.try_to_offset() {
                    state.add_new_stack_offset_to_string_entry(
                        offset_value,
                        domain_input_string.clone(),
                    );
                }
            } else {
                state.add_new_heap_to_string_entry(target.clone(), domain_input_string.clone());
            }
        }
    }

    /// Regex that filters format specifier from a format string.
    pub fn re_format_specifier() -> Regex {
        Regex::new(r"%\d{0,2}([c,C,d,i,o,u,x,X,e,E,f,F,g,G,a,A,n,p,s,S]|hi|hd|hu|li|ld|lu|lli|lld|llu|lf|lg|le|la|lF|lG|lE|lA|Lf|Lg|Le|La|LF|LG|LE|LA)").expect("No valid regex!")
    }

    /// Merges domains from multiple pointer targets. The merged domain serves as input to a format string.
    /// If one of the targets does not contain a domain or the offset of a stack target cannot be parsed,
    /// a *Top* value is returned as no assumption can be made about the input.
    pub fn merge_domains_from_multiple_pointer_targets(
        state: &State<T>,
        pi_state: &PointerInferenceState,
        pointer: &BTreeMap<AbstractIdentifier, IntervalDomain>,
    ) -> T {
        let mut domains: Vec<T> = Vec::new();
        for (target, offset) in pointer.iter() {
            // Check the stack offset map if the target points to a stack position.
            if pi_state.stack_id == *target {
                if let Ok(offset_value) = offset.try_to_offset() {
                    if let Some(domain) = state.get_stack_offset_to_string_map().get(&offset_value)
                    {
                        domains.push(domain.clone());
                    } else {
                        return T::create_top_value_domain();
                    }
                } else {
                    return T::create_top_value_domain();
                }
            } else {
                // Check the heap map if the target points to a heap position.
                if let Some(domain) = state.get_heap_to_string_map().get(target) {
                    domains.push(domain.clone());
                } else {
                    return T::create_top_value_domain();
                }
            }
        }

        let mut init_domain = domains.first().unwrap().clone();
        domains.remove(0);
        for remaining_domain in domains.iter() {
            init_domain = init_domain.merge(remaining_domain);
        }

        init_domain
    }

    /// Calls the appropriate data type approximator.
    pub fn approximate_string_domain_from_datatype(specifier: String) -> T {
        match Datatype::from(specifier) {
            Datatype::Char => T::create_char_domain(),
            Datatype::Integer => T::create_integer_domain(),
            Datatype::Pointer => T::create_pointer_value_domain(),
            Datatype::Double | Datatype::Long | Datatype::LongDouble | Datatype::LongLong => {
                T::create_float_value_domain()
            }
            _ => panic!("Invalid data type specifier from format string."),
        }
    }

    /// Inserts an integer constant into the format string.
    pub fn get_constant_integer_domain(constant: Bitvector) -> Option<T> {
        if let Ok(integer) = constant.try_to_i64() {
            return Some(T::from(integer.to_string()));
        }

        None
    }

    /// Inserts a char constant into the format string.
    pub fn get_constant_char_domain(&self, constant: Bitvector) -> Option<T> {
        if let Ok(Some(char_code)) = self.project.runtime_memory_image.read(
            &constant,
            self.project
                .datatype_properties
                .get_size_from_data_type(Datatype::Char),
        ) {
            if let Some(c_char) = Context::<T>::parse_bitvec_to_char(char_code) {
                return Some(T::from(c_char.to_string()));
            }
        } else if let Some(c_char) = Context::<T>::parse_bitvec_to_char(constant.clone()) {
            return Some(T::from(c_char.to_string()));
        }

        None
    }

    /// Parses a bitvector to a char if possible.
    pub fn parse_bitvec_to_char(char_code: Bitvector) -> Option<char> {
        if let Ok(code) = char_code.try_to_u32() {
            if let Some(c_char) = std::char::from_u32(code) {
                return Some(c_char);
            }
        }

        None
    }

    /// Inserts a string constant into the format string.
    pub fn get_constant_string_domain(&self, constant: Bitvector) -> Option<T> {
        if let Ok(string) = self
            .project
            .runtime_memory_image
            .read_string_until_null_terminator(&constant)
        {
            if !string.is_empty() {
                return Some(T::from(string.to_string()));
            }
        }

        None
    }

    /// Deletes string entries in the heap to string map if the corresponding pointer is used
    /// to free memory space.
    pub fn handle_free(&self, state: &State<T>, extern_symbol: &ExternSymbol) -> State<T> {
        let mut new_state = state.clone();

        if let Some(dest_arg) = extern_symbol.parameters.first() {
            if let Some(pi_state) = state.get_pointer_inference_state() {
                if let Ok(pointer) =
                    pi_state.eval_parameter_arg(dest_arg, &self.project.runtime_memory_image)
                {
                    let heap_to_string_map = state.get_heap_to_string_map();
                    for (target, _) in pointer.get_relative_values().iter() {
                        if heap_to_string_map.contains_key(target) {
                            new_state.remove_heap_to_string_entry(target);
                        }
                    }
                }
            }
        }

        new_state
    }
}

#[cfg(test)]
pub mod tests;