1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
use super::RegisterProperties;
use crate::intermediate_representation::{BinOpType, Blk, Def, Expression, Jmp, Variable};
use crate::prelude::*;
use std::collections::HashMap;
use std::iter::Peekable;
use std::ops::Deref;

/// In the given block replace all occurences of sub-registers by equivalent expressions of the corresponding base register.
/// By getting rid of all mentions of sub-registers we get rid of all hidden dependencies between registers,
/// which simplifies later analyses.
///
/// For example, this replaces mentions of the register EAX by a SUBPIECE expression of RAX for x86-64.
///
/// Note that Ghidra (and thus also the cwe_checker) handles flag registers as independent 1-bit registers
/// instead of combining them to one register containing all flags.
pub fn replace_subregister_in_block(
    block: &mut Term<Blk>,
    register_map: &HashMap<&String, &RegisterProperties>,
) {
    // Substitute subregisters in expressions contained in jump instructions
    for jump in block.term.jmps.iter_mut() {
        replace_subregister_in_jump(jump, register_map);
    }
    // Substitute subregisters in Def instructions
    let new_defs =
        SubregisterSubstitutionBuilder::compute_replacement_defs_for_block(block, register_map);
    block.term.defs = new_defs;
}

/// A builder struct for substitution of subregisters in a basic block.
///
/// For the basic workflow of the subregister substitution process see the
/// [compute_replacement_defs_for_block](SubregisterSubstitutionBuilder::compute_replacement_defs_for_block) method.
struct SubregisterSubstitutionBuilder<'a> {
    /// The register map containing the necessary information which registers are subregisters
    /// and what their base registers are.
    register_map: &'a HashMap<&'a String, &'a RegisterProperties>,
    /// The input iterator keeps track of which Def-terms are still to be substituted.
    input_iter: Peekable<std::slice::Iter<'a, Term<Def>>>,
    /// The output vector contains the already substituted Def-terms (in the correct order).
    output_defs: Vec<Term<Def>>,
}

impl<'a> SubregisterSubstitutionBuilder<'a> {
    /// Initialize a new substitution builder.
    fn new(
        block: &'a Term<Blk>,
        register_map: &'a HashMap<&'a String, &'a RegisterProperties>,
    ) -> Self {
        SubregisterSubstitutionBuilder {
            register_map,
            input_iter: block.term.defs.iter().peekable(),
            output_defs: Vec::new(),
        }
    }

    /// Iterate through all Def-terms of the given block and replace any occurence of a subregister
    /// by its base register (wrapped in SUBPIECE- or PIECE- instructions to not change program semantics).
    ///
    /// The basic workflow of the subregister substitution process for each Def-term is as follows:
    /// - First replace all occurences of subregisters as inputs into expressions of the Def-term
    ///   by replacing them with the SUBPIECE-wrapped base register.
    /// - Replace subregisters as outputs of the Def-term.
    ///   If the Def-term is an assignment and the next Def-term is a cast to the base register (e.g. a zero-extension)
    ///   then combine the two Def-terms to one.
    ///   If the Def-term is a load instruction then replace the output with a temporary register
    ///   and add a second Def-term that assigns the temporary register value to the corresponding bytes of the base register.
    ///   In all other cases one can assign directly to the corresponding bytes of the base register.
    pub fn compute_replacement_defs_for_block(
        block: &'a Term<Blk>,
        register_map: &'a HashMap<&'a String, &'a RegisterProperties>,
    ) -> Vec<Term<Def>> {
        let mut substitution_builder = Self::new(block, register_map);
        while let Some(def) = substitution_builder.input_iter.next() {
            substitution_builder.replace_subregister(def);
        }
        substitution_builder.output_defs
    }

    /// First replace subregisters as input into expressions of the given Def-term.
    /// The replace subregisters as outputs of the Def-term.
    /// The results get added to the `output_defs` array of `self`.
    fn replace_subregister(&mut self, def: &Term<Def>) {
        let mut def = def.clone();
        match &mut def.term {
            Def::Assign {
                var: _,
                value: expr,
            }
            | Def::Load {
                var: _,
                address: expr,
            } => {
                *expr = replace_input_subregister(expr.clone(), self.register_map);
            }
            Def::Store { address, value } => {
                *address = replace_input_subregister(address.clone(), self.register_map);
                *value = replace_input_subregister(value.clone(), self.register_map);
            }
        }
        self.replace_output_subregister(def);
    }

    /// Replace subregisters as output variables of assign- or load-instructions.
    ///
    /// If the next Def-term in the `input_iter` of `self` is a cast to the base register,
    /// then it might get combined with the given Def-term to a single Def-term.
    /// In this case the `input_iter` is advanced by one step.
    ///
    /// For load instructions two Def-terms might get added to the `output_defs` array of `self`.
    fn replace_output_subregister(&mut self, def: Term<Def>) {
        match &def.term {
            Def::Assign { var, value } => {
                // At this point, the code should be in a form where variables
                // being assigned and values that are assigned are of the same
                // size.
                debug_assert_eq!(
                    var.size,
                    value.bytesize(),
                    "Encountered invalid assignment: variable {} has size {} vs. value {} has size {}.",
                    var,
                    var.size,
                    value,
                    value.bytesize()
                );
                if let Some(register) = self.register_map.get(&var.name) {
                    let base_register: &RegisterProperties =
                        self.register_map.get(&register.base_register).unwrap();
                    // Check if the variable being assigned is a subregister.
                    if is_subregister_assignment(var, base_register) {
                        // The register is not a base register and should be replaced.
                        if self.is_next_def_cast_to_base_register(var) {
                            let mut output = self.input_iter.next().unwrap().clone();
                            match &mut output.term {
                                Def::Assign {
                                    var: _var_cast,
                                    value: output_expr,
                                } => {
                                    output_expr.substitute_input_var(var, value);
                                }
                                _ => panic!(),
                            }
                            self.output_defs.push(output);
                            return;
                        } else {
                            let output_var: Variable = base_register.into();
                            let register = into_subregister(register, var);
                            let output_expression =
                                piece_base_register_assignment_expression_together(
                                    value,
                                    base_register,
                                    &register,
                                );
                            self.output_defs.push(Term {
                                tid: def.tid.clone(),
                                term: Def::Assign {
                                    var: output_var,
                                    value: output_expression,
                                },
                            });
                            return;
                        }
                    }
                }
            }
            Def::Load { var, address } => {
                if let Some(register) = self.register_map.get(&var.name) {
                    let base_register: &RegisterProperties =
                        self.register_map.get(&register.base_register).unwrap();
                    if is_subregister_assignment(var, base_register) {
                        // The register is not a base register and should be replaced.
                        // We need two replacement defs: One is a load into a temporary register
                        // and the second is a cast to the base register.
                        let temp_reg = Variable {
                            name: "loaded_value".to_string(),
                            size: var.size,
                            is_temp: true,
                        };
                        self.output_defs.push(Term {
                            tid: def.tid.clone(),
                            term: Def::Load {
                                var: temp_reg.clone(),
                                address: address.clone(),
                            },
                        });
                        if self.is_next_def_cast_to_base_register(var) {
                            let mut cast_to_base_def = self.input_iter.next().unwrap().clone();
                            if let Def::Assign { value, .. } = &mut cast_to_base_def.term {
                                value.substitute_input_var(var, &Expression::Var(temp_reg));
                            } else {
                                panic!()
                            }
                            self.output_defs.push(cast_to_base_def);
                        } else {
                            let register = into_subregister(register, var);
                            self.output_defs.push(Term {
                                tid: def.tid.clone().with_id_suffix("_cast_to_base"),
                                term: Def::Assign {
                                    var: base_register.into(),
                                    value: piece_base_register_assignment_expression_together(
                                        &Expression::Var(temp_reg),
                                        base_register,
                                        &register,
                                    ),
                                },
                            });
                        }
                        return;
                    }
                }
            }
            Def::Store { .. } => (), // No output variable to replace.
        }
        // If we reach this point we did not need to modify the Def
        self.output_defs.push(def);
    }

    /// Return `true` if the next Def-term in the `input_iter` of `self` is a cast of the given variable
    /// to its corresponding base register.
    fn is_next_def_cast_to_base_register(&mut self, input_var: &Variable) -> bool {
        if let Some(peeked_def) = self.input_iter.peek() {
            if let Def::Assign { var, value } = &peeked_def.term {
                if let (Some(reg), Some(input_reg)) = (
                    self.register_map.get(&var.name),
                    self.register_map.get(&input_var.name),
                ) {
                    if let Expression::Cast { arg, .. } = value {
                        match arg.deref() {
                            Expression::Var(cast_var) if cast_var == input_var => {
                                if input_reg.register != input_reg.base_register
                                    && input_reg.base_register == reg.register
                                {
                                    return true;
                                }
                            }
                            _ => (),
                        }
                    }
                }
            }
        }

        false
    }
}

/// Replace subregisters that are inputs into expressions used by the given jump term
/// by SUBPIECE expressions of the corresponding base register.
fn replace_subregister_in_jump(
    jump: &mut Term<Jmp>,
    register_map: &HashMap<&String, &RegisterProperties>,
) {
    match &mut jump.term {
        Jmp::BranchInd(expr)
        | Jmp::CBranch {
            condition: expr, ..
        }
        | Jmp::CallInd { target: expr, .. }
        | Jmp::Return(expr) => {
            *expr = replace_input_subregister(expr.clone(), register_map);
        }
        Jmp::Branch(_) | Jmp::Call { .. } | Jmp::CallOther { .. } => (),
    }
}

/// Replace input subregisters of the given expression by SUBPIECEs of the corresponding base register.
/// Return the resulting expression.
pub fn replace_input_subregister(
    mut expression: Expression,
    register_map: &HashMap<&String, &RegisterProperties>,
) -> Expression {
    let mut replacement_pairs = Vec::new();
    for var in expression.input_vars() {
        if let Some(register) = register_map.get(&var.name) {
            if var.name != register.base_register || var.size < register.size {
                // The register is not a base register and should be replaced.
                let target_size = var.size;
                let replacement_expr = create_subpiece_from_sub_register(
                    register.base_register.clone(),
                    target_size,
                    register.lsb,
                    register_map,
                );
                replacement_pairs.push((var.clone(), replacement_expr));
            }
        }
    }
    for (var, replacement_expr) in replacement_pairs {
        expression.substitute_input_var(&var, &replacement_expr);
    }
    expression
}

/// This function creates a SUBPIECE expression
/// from a subregister containing the corresponding base register.
fn create_subpiece_from_sub_register(
    base: String,
    size: ByteSize,
    lsb: ByteSize,
    register_map: &HashMap<&String, &RegisterProperties>,
) -> Expression {
    Expression::Subpiece {
        low_byte: lsb,
        size,
        arg: Box::new(Expression::Var(Variable {
            name: base.clone(),
            size: register_map.get(&base).unwrap().size,
            is_temp: false,
        })),
    }
}

/// Consider an assignment of the form `sub-register = input_expression`.
/// Then this function pieces together an assignment expression for the base register
/// out of the input expression and those parts of the base register
/// that are not part of the sub-register (i.e. that are not overwritten by the sub-register assignment).
fn piece_base_register_assignment_expression_together(
    input_expression: &Expression,
    output_base_register: &RegisterProperties,
    sub_register: &RegisterProperties,
) -> Expression {
    let base_size: ByteSize = output_base_register.size;
    let base_name: &String = &output_base_register.register;
    let sub_size: ByteSize = sub_register.size;
    let sub_lsb: ByteSize = sub_register.lsb;

    let base_subpiece = Box::new(Expression::Var(Variable {
        name: base_name.clone(),
        size: base_size,
        is_temp: false,
    }));

    if sub_register.lsb > ByteSize::new(0) && sub_register.lsb + sub_register.size == base_size {
        // Build PIECE as PIECE(lhs: sub_register, rhs: low subpiece)
        Expression::BinOp {
            op: BinOpType::Piece,
            lhs: Box::new(input_expression.clone()),
            rhs: Box::new(Expression::Subpiece {
                low_byte: ByteSize::new(0),
                size: sub_lsb,
                arg: base_subpiece,
            }),
        }
    } else if sub_register.lsb > ByteSize::new(0) {
        // Build PIECE as PIECE(lhs:PIECE(lhs:higher subpiece, rhs:sub register), rhs:lower subpiece)
        Expression::BinOp {
            op: BinOpType::Piece,
            lhs: Box::new(Expression::BinOp {
                op: BinOpType::Piece,
                lhs: Box::new(Expression::Subpiece {
                    low_byte: sub_lsb + sub_size,
                    size: base_size - (sub_lsb + sub_size),
                    arg: base_subpiece.clone(),
                }),
                rhs: Box::new(input_expression.clone()),
            }),
            rhs: Box::new(Expression::Subpiece {
                low_byte: ByteSize::new(0),
                size: sub_lsb,
                arg: base_subpiece,
            }),
        }
    } else {
        // Build PIECE as PIECE(lhs: high subpiece, rhs: sub register)
        Expression::BinOp {
            op: BinOpType::Piece,
            lhs: Box::new(Expression::Subpiece {
                low_byte: sub_size,
                size: base_size - sub_size,
                arg: base_subpiece,
            }),
            rhs: Box::new(input_expression.clone()),
        }
    }
}

/// Helper to check wheter an assignment to `var` is a subregister assignment.
///
/// Assumes that `var` is held in a register and that `base_register` is the
/// corresponding full-size register.
fn is_subregister_assignment(var: &Variable, base_register: &RegisterProperties) -> bool {
    var.name != base_register.register || var.size < base_register.size
}

/// Helper to create a subregister based on a variable.
///
/// Assumes that `var` is held in a register whose full-size version is equal
/// to, or a superregister of, `register`.
fn into_subregister(register: &RegisterProperties, var: &Variable) -> RegisterProperties {
    // Background: If a subregister being assigned has the same
    //   name as the full-size register (but a different size), `register`
    //   will be the full register at this point. Thus, we correct the
    //   size manually and create a register with the full-size name but a
    //   smaller size. This should be a no-op if `register` is already the
    //   correct subregister.
    // FIXME: This is incomplete if the LSB of the subregister and the full
    // register are different. Until now this has not been observed as in those
    // cases the register names are different.
    let mut register = register.clone();
    register.size = var.size;

    register
}

#[cfg(test)]
mod tests;