1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
use crate::intermediate_representation::*;
use crate::prelude::*;
use derive_more::Deref;
use std::ops::Deref;
use std::sync::Arc;

mod location;
pub use location::AbstractLocation;
mod mem_location;
pub use mem_location::AbstractMemoryLocation;

/// An abstract identifier is used to identify an object or a value in an abstract state.
///
/// Since many program states can be represented by the same abstract state in data-flow analysis,
/// one sometimes needs a way to uniquely identify a variable or a memory object in all of the represented program states.
/// Abstract identifiers achieve this by identifying a *time*, i.e. a specific abstract state,
/// and a *location*, i.e. a recipe for computing a concrete value from any concrete state that is represented by the abstract state.
/// The value in question then serves as the identifier.
/// For example, a pointer may uniquely determine the memory object it is pointing to.
/// Or a value may represent the value of a variable at a certain time,
/// whereas the value of the variable in the current state is given as an offset to the value at the identified time.
///
/// Since program points may be visited several times during an execution trace (e.g. in loops),
/// the *time* component of an abstract identifier may not actually determine an unique point in time of an execution trace.
/// In this case the meaning of an abstract identifier depends upon its use case.
/// E.g. it may represent the union of all values at the specific *location* for each time the program point is visited during an execution trace
/// or it may only represent the value at the last time the program point was visited.
///
/// Alternatively, one can also add path hints to an identifier to further distinguish points in time in an execution trace.
/// Path hints are given as a possibly empty array of time identifiers.
/// To prevent infinitely long path hints, each time identifier is only allowed to appear at most once in the array.
/// The specific meaning of the path hints depends upon the use case.
///
/// An abstract identifier is given by a time identifier, a location identifier and a path hints array (containing time identifiers).
///
/// For the location identifier see [`AbstractLocation`].
/// The time identifier is given by a [`Tid`].
/// If it is the `Tid` of a basic block, then it describes the point in time *before* execution of the first instruction in the block.
/// If it is the `Tid` of a `Def` or `Jmp`, then it describes the point in time *after* the execution of the `Def` or `Jmp`.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Deref)]
#[deref(forward)]
pub struct AbstractIdentifier(Arc<AbstractIdentifierData>);

/// The data contained in an abstract identifier
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub struct AbstractIdentifierData {
    time: Tid,
    location: AbstractLocation,
    path_hints: Vec<Tid>,
}

impl AbstractIdentifier {
    /// Create a new abstract identifier.
    pub fn new(time: Tid, location: AbstractLocation) -> AbstractIdentifier {
        AbstractIdentifier(Arc::new(AbstractIdentifierData {
            time,
            location,
            path_hints: Vec::new(),
        }))
    }

    /// Create a new abstract identifier where the abstract location is a register.
    /// Panics if the register is a temporary register.
    pub fn from_var(time: Tid, variable: &Variable) -> AbstractIdentifier {
        AbstractIdentifier(Arc::new(AbstractIdentifierData {
            time,
            location: AbstractLocation::from_var(variable).unwrap(),
            path_hints: Vec::new(),
        }))
    }

    /// Create an abstract identifier from a parameter argument.
    ///
    /// If the argument is a sub-register, then the created identifier contains the whole base register.
    pub fn from_arg(time: &Tid, arg: &Arg) -> AbstractIdentifier {
        let location_register = match arg {
            Arg::Register { expr, .. } | Arg::Stack { address: expr, .. } => {
                match &expr.input_vars()[..] {
                    [var] => *var,
                    _ => panic!("Malformed argument expression encountered"),
                }
            }
        };
        let location = match arg {
            Arg::Register { .. } => AbstractLocation::from_var(location_register).unwrap(),
            Arg::Stack { size, .. } => AbstractLocation::from_stack_position(
                location_register,
                arg.eval_stack_offset().unwrap().try_to_i64().unwrap(),
                *size,
            ),
        };
        AbstractIdentifier::new(time.clone(), location)
    }

    /// Create an abstract identifier from an address into global memory.
    pub fn from_global_address(time: &Tid, address: &Bitvector) -> AbstractIdentifier {
        AbstractIdentifier::new(time.clone(), AbstractLocation::from_global_address(address))
    }

    /// Create a new abstract identifier
    /// by pushing the given path hint to the array of path hints of `self`.
    /// Returns an error if the path hint is already contained in the path hints of `self`.
    pub fn with_path_hint(&self, path_hint: Tid) -> Result<Self, Error> {
        if self.path_hints.iter().any(|tid| *tid == path_hint) {
            Err(anyhow!("Path hint already contained."))
        } else {
            let mut new_id = self.clone();
            let inner = Arc::make_mut(&mut new_id.0);
            inner.path_hints.push(path_hint);
            Ok(new_id)
        }
    }

    /// Create a new abstract identifier by removing the last path hint from the path hint array of `self`.
    /// Return the new identifier together with the removed path hint (or none if `self` has no path hints).
    pub fn without_last_path_hint(&self) -> (Self, Option<Tid>) {
        let mut new_id = self.clone();
        let inner = Arc::make_mut(&mut new_id.0);
        let last_path_hint = inner.path_hints.pop();

        (new_id, last_path_hint)
    }

    /// Get the path hints array of `self`.
    pub fn get_path_hints(&self) -> &[Tid] {
        &self.path_hints
    }

    /// Get the register associated to the abstract location.
    /// Panics if the abstract location is not a register but a memory location.
    pub fn unwrap_register(&self) -> &Variable {
        match &self.location {
            AbstractLocation::Register(var) => var,
            AbstractLocation::GlobalAddress { .. }
            | AbstractLocation::GlobalPointer(_, _)
            | AbstractLocation::Pointer(_, _) => panic!("Abstract location is not a register."),
        }
    }

    /// Get the TID representing the time component of the abstract ID.
    pub fn get_tid(&self) -> &Tid {
        &self.time
    }

    /// Get the location component of the abstract ID.
    pub fn get_location(&self) -> &AbstractLocation {
        &self.location
    }

    /// Get the bytesize of the value represented by the abstract ID.
    pub fn bytesize(&self) -> ByteSize {
        self.location.bytesize()
    }

    /// If the abstract location of `self` has a parent location
    /// then return the ID one gets when replacing the abstract location in `self` with its parent location.
    pub fn get_id_with_parent_location(
        &self,
        generic_pointer_size: ByteSize,
    ) -> Option<AbstractIdentifier> {
        if let Ok((parent_location, _)) = self.location.get_parent_location(generic_pointer_size) {
            let id_data = AbstractIdentifierData {
                time: self.deref().time.clone(),
                location: parent_location,
                path_hints: self.deref().path_hints.clone(),
            };
            Some(AbstractIdentifier(Arc::new(id_data)))
        } else {
            None
        }
    }
}

impl std::fmt::Display for AbstractIdentifier {
    fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
        if self.path_hints.is_empty() {
            write!(formatter, "{} @ {}", self.0.time, self.0.location)
        } else {
            write!(formatter, "{}(", self.0.time)?;
            for hint in &self.0.path_hints {
                write!(formatter, "->{hint}",)?;
            }
            write!(formatter, ") @ {}", self.0.location)
        }
    }
}

#[cfg(test)]
pub mod tests {
    use super::*;
    use crate::variable;

    impl AbstractIdentifier {
        /// Mock an abstract identifier with the given TID name and pointing to the value in the given register name.
        pub fn mock(
            tid: impl ToString,
            register: impl ToString,
            size_in_bytes: u64,
        ) -> AbstractIdentifier {
            AbstractIdentifier::new(
                Tid::new(tid.to_string()),
                AbstractLocation::from_var(&variable!(format!(
                    "{}:{}",
                    register.to_string(),
                    size_in_bytes
                )))
                .unwrap(),
            )
        }

        /// Mock an abstract identifier with the given TID name
        /// and with a nested abstract location starting at the register given by `var`.
        pub fn mock_nested(
            tid: impl ToString,
            var: &str,
            offsets: &[i64],
            size: impl Into<ByteSize>,
        ) -> Self {
            AbstractIdentifier::new(
                Tid::new(tid.to_string()),
                AbstractLocation::mock(var, offsets, size),
            )
        }
    }

    #[test]
    fn test_constraint_enforcements() {
        // Test that no temporary registers are allowed as abstract locations.
        assert!(AbstractLocation::from_var(&Variable {
            name: "var".to_string(),
            size: ByteSize::new(8),
            is_temp: true,
        })
        .is_err());
        // Test uniqueness of TIDs in path hint array.
        let id = AbstractIdentifier::new(
            Tid::new("time_id"),
            AbstractLocation::from_var(&variable!("var:8")).unwrap(),
        );
        let id = id.with_path_hint(Tid::new("first_hint")).unwrap();
        let id = id.with_path_hint(Tid::new("second_hint")).unwrap();
        assert!(id.with_path_hint(Tid::new("first_hint")).is_err());
    }

    #[test]
    fn test_bytesize() {
        let location =
            AbstractLocation::from_stack_position(&variable!("RSP:8"), 10, ByteSize::new(4));
        let id = AbstractIdentifier::new(Tid::new("id"), location);
        assert_eq!(id.bytesize(), ByteSize::new(4));
    }
}