1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
use super::Context;
use crate::abstract_domain::{AbstractIdentifier, DataDomain, IntervalDomain, TryToBitvec};
use crate::prelude::*;

/// This struct contains the computed bound for an object.
/// If the object is a parameter object,
/// it also contains metadata about the source object used to determine the bound for the parameter object.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct BoundsMetadata {
    /// The source object (and the offset into the source object that the object points to)
    /// if the bound of the memory object is derived from another object (e.g. for parameter objects).
    pub source: Option<DataDomain<IntervalDomain>>,
    /// The resulting bound for the memory object.
    pub resulting_bound: i64,
}

impl BoundsMetadata {
    /// Create a new instance without source metadata.
    pub fn new(resulting_bound: i64) -> BoundsMetadata {
        BoundsMetadata {
            source: None,
            resulting_bound,
        }
    }

    /// Create an instance where the source of the bound is given by `id + offset`.
    pub fn from_source(
        id: &AbstractIdentifier,
        offset: &IntervalDomain,
        resulting_bound: i64,
    ) -> BoundsMetadata {
        BoundsMetadata {
            source: Some(DataDomain::from_target(id.clone(), offset.clone())),
            resulting_bound,
        }
    }
}

/// If `bound` is `None`, replace it with `new_bound`.
/// Else only replace it if the bound in `new_bound` is smaller than the existing bound.
fn replace_if_smaller_bound(bound: &mut Option<BoundsMetadata>, new_bound: BoundsMetadata) {
    if let Some(old_bound) = bound {
        if old_bound.resulting_bound > new_bound.resulting_bound {
            *bound = Some(new_bound);
        }
    } else {
        *bound = Some(new_bound);
    }
}

/// If `bound` is `None`, replace it with `new_bound`.
/// Else only replace it if the bound in `new_bound` is larger than the existing bound.
fn replace_if_larger_bound(bound: &mut Option<BoundsMetadata>, new_bound: BoundsMetadata) {
    if let Some(old_bound) = bound {
        if old_bound.resulting_bound < new_bound.resulting_bound {
            *bound = Some(new_bound);
        }
    } else {
        *bound = Some(new_bound);
    }
}

impl<'a> Context<'a> {
    /// Compute the bounds of the memory object associated with the given parameter ID.
    ///
    /// Since the memory object associated to a parameter may not be unique
    /// the bounds are only approximated from those objects where exact bounds could be determined.
    /// If different objects were found the bounds are approximated by the strictest bounds that were found.
    fn compute_bounds_of_param_id(
        &self,
        param_object_id: &AbstractIdentifier,
    ) -> (Option<BoundsMetadata>, Option<BoundsMetadata>) {
        let object_data = self.recursively_substitute_param_values(&DataDomain::from_target(
            param_object_id.clone(),
            Bitvector::zero(param_object_id.bytesize().into()).into(),
        ));
        let mut lower_bound: Option<BoundsMetadata> = None;
        let mut upper_bound: Option<BoundsMetadata> = None;

        for (id, offset) in object_data.get_relative_values() {
            // Right now we ignore cases where we do not know the exact offset into the object.
            let concrete_offset = match offset.try_to_offset() {
                Ok(offset) => offset,
                Err(_) => continue,
            };
            if self
                .malloc_tid_to_object_size_map
                .contains_key(id.get_tid())
            {
                replace_if_larger_bound(
                    &mut lower_bound,
                    BoundsMetadata::from_source(id, offset, -concrete_offset),
                );
                let object_size = self.compute_size_of_heap_object(id);
                if let Ok(concrete_object_size) = object_size.try_to_offset() {
                    replace_if_smaller_bound(
                        &mut upper_bound,
                        BoundsMetadata::from_source(
                            id,
                            offset,
                            concrete_object_size - concrete_offset,
                        ),
                    );
                }
            } else if self.is_stack_frame_id(id) {
                let stack_frame_upper_bound = self
                    .function_signatures
                    .get(id.get_tid())
                    .unwrap()
                    .get_stack_params_total_size(&self.project.stack_pointer_register);
                replace_if_smaller_bound(
                    &mut upper_bound,
                    BoundsMetadata::from_source(
                        id,
                        offset,
                        stack_frame_upper_bound - concrete_offset,
                    ),
                );
                // We do not set a lower bound since we do not know the concrete call site for stack pointers,
                // which we would need to determine a correct lower bound.
            }
            // FIXME: Cases not handled here include unresolved parameter IDs, unknown IDs and global pointers.
            // For the first two we do not have any size information.
            // For global pointers we need some kind of pre-analysis so that we do not have to assume
            // that the pointer may address the complete range of global data addresses.
        }
        (lower_bound, upper_bound)
    }

    /// Compute the bounds of a memory object given by the provided `object_id`.
    ///
    /// Returns `(lower_bound, upper_bound)`, where the bounds may be `None` if they could not be determined.
    pub fn compute_bounds_of_id(
        &self,
        object_id: &AbstractIdentifier,
        current_stack_frame_id: &AbstractIdentifier,
    ) -> (Option<BoundsMetadata>, Option<BoundsMetadata>) {
        // FIXME: The malloc-tid-to-object-size-map check does not work anymore,
        // because we do not use path hints in the PointerInference anymore.
        if self
            .malloc_tid_to_object_size_map
            .contains_key(object_id.get_tid())
        {
            let object_size = self.compute_size_of_heap_object(object_id);
            if let Ok(object_size) = object_size.try_to_offset() {
                (
                    Some(BoundsMetadata::new(0)),
                    Some(BoundsMetadata::new(object_size)),
                )
            } else {
                (Some(BoundsMetadata::new(0)), None)
            }
        } else if object_id == current_stack_frame_id {
            let stack_frame_upper_bound = self
                .function_signatures
                .get(object_id.get_tid())
                .unwrap()
                .get_stack_params_total_size(&self.project.stack_pointer_register);
            (None, Some(BoundsMetadata::new(stack_frame_upper_bound)))
        } else if object_id.get_tid() == current_stack_frame_id.get_tid()
            && object_id.get_path_hints().is_empty()
        {
            // Handle parameter IDs
            self.compute_bounds_of_param_id(object_id)
        } else {
            // The type of object is unknown, thus the size restrictions are also unknown.
            (None, None)
        }
    }
}

#[cfg(test)]
pub mod tests {
    use super::*;
    use crate::analysis::pointer_inference::Data;
    use crate::{bitvec, intermediate_representation::parsing};
    use std::collections::{HashMap, HashSet};

    #[test]
    fn test_compute_bounds_of_param_id() {
        let mut context = Context::mock_x64();
        let param_id = AbstractIdentifier::mock("func", "RDI", 8);
        let param_id_2 = AbstractIdentifier::mock("func", "RSI", 8);
        let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
        let callsite_id_2 = AbstractIdentifier::mock("callsite_id", "RSI", 8);
        let malloc_call_id = AbstractIdentifier::mock("malloc_call", "RAX", 8);
        let main_stack_id = AbstractIdentifier::mock("main", "RSP", 8);

        let param_value = Data::from_target(malloc_call_id.clone(), bitvec!("2:8").into());
        let param_value_2 = Data::from_target(main_stack_id.clone(), bitvec!("-10:8").into());
        let param_replacement_map = HashMap::from([
            (callsite_id, param_value.clone()),
            (callsite_id_2, param_value_2.clone()),
        ]);
        let callee_to_callsites_map =
            HashMap::from([(Tid::new("func"), HashSet::from([Tid::new("callsite_id")]))]);
        context.param_replacement_map = param_replacement_map;
        context.callee_to_callsites_map = callee_to_callsites_map;
        context
            .malloc_tid_to_object_size_map
            .insert(Tid::new("malloc_call"), Data::from(bitvec!("42:8")));
        context.call_to_caller_fn_map = HashMap::from([
            (Tid::new("malloc_call"), Tid::new("main")),
            (Tid::new("callsite_id"), Tid::new("main")),
        ]);
        // Test bound computation if the param gets resolved to a heap object
        let (lower_bound, upper_bound) = context.compute_bounds_of_param_id(&param_id);
        assert_eq!(lower_bound.unwrap().resulting_bound, -2);
        assert_eq!(upper_bound.unwrap().resulting_bound, 40);
        // Test bound computation if the param gets resolved to a caller stack frame
        let (lower_bound, upper_bound) = context.compute_bounds_of_param_id(&param_id_2);
        assert_eq!(lower_bound, None);
        assert_eq!(upper_bound.unwrap().resulting_bound, 10);
    }
}