1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
//! Definition of the Taint Analysis for CWE252.
//!
//! Implementation of the [`TaintAnalysis`] trait for this CWE check. See the
//! module documentation for more details on the algorithm and its limitations.
use super::MustUseCall;
use crate::analysis::fixpoint;
use crate::analysis::forward_interprocedural_fixpoint::{
self, create_computation as fwd_fp_create_computation,
};
use crate::analysis::graph::{Graph as Cfg, HasCfg};
use crate::analysis::interprocedural_fixpoint_generic::NodeValue;
use crate::analysis::pointer_inference::{Data as PiData, PointerInference};
use crate::analysis::taint::state::State as TaState;
use crate::analysis::taint::TaintAnalysis;
use crate::analysis::vsa_results::{HasVsaResult, VsaResult};
use crate::intermediate_representation::{Blk, ExternSymbol, Jmp, Project, Term, Tid};
use crate::utils::debug::ToJsonCompact;
use crate::utils::log::CweWarning;
use std::convert::AsRef;
/// Type of the fixpoint computation of the taint analysis.
pub type FpComputation<'a, 'b> = fixpoint::Computation<
forward_interprocedural_fixpoint::GeneralizedContext<'a, TaComputationContext<'a, 'b>>,
>;
impl ToJsonCompact for FpComputation<'_, '_> {
fn to_json_compact(&self) -> serde_json::Value {
let graph = self.get_graph();
let mut json_nodes = serde_json::Map::new();
for (node_index, node_value) in self.node_values().iter() {
let node = graph.node_weight(*node_index).unwrap();
if let NodeValue::Value(value) = node_value {
json_nodes.insert(format!("{node}"), value.to_json_compact());
}
}
serde_json::Value::Object(json_nodes)
}
}
/// Type that represents the definition of the taint analysis.
///
/// Values of this type represent the taint analysis for a particular call to an
/// external function.
pub struct TaComputationContext<'a, 'b: 'a> {
/// Extern function call that is analyzed.
call: MustUseCall<'a>,
project: &'a Project,
pi_result: &'a PointerInference<'b>,
/// Used to send generated CWE warnings to the collector.
cwe_sender: crossbeam_channel::Sender<CweWarning>,
}
impl<'a, 'b: 'a> TaComputationContext<'a, 'b> {
/// Creates a new taint analysis context for the given call to an external
/// function.
pub(super) fn new(
call: MustUseCall<'a>,
project: &'a Project,
pi_result: &'a PointerInference<'b>,
cwe_sender: &'a crossbeam_channel::Sender<CweWarning>,
) -> Self {
Self {
call,
project,
pi_result,
cwe_sender: cwe_sender.clone(),
}
}
/// Converts the taint analysis context into a fixpoint computation.
///
/// The returned computation can be solved to analyze this particular
/// function call.
pub fn into_computation(self) -> FpComputation<'a, 'b> {
let symbol = self.call.symbol;
let vsa_result = self.vsa_result();
let return_node = self.call.return_node;
let node_value = NodeValue::Value(TaState::new_return(symbol, vsa_result, return_node));
let mut computation = fwd_fp_create_computation(self, None);
computation.set_node_value(return_node, node_value);
computation
}
fn generate_cwe_warning(&self, warning_location: &Tid, reason: &str) {
super::generate_cwe_warning(&self.cwe_sender, &self.call, warning_location, reason)
}
}
impl<'a> HasCfg<'a> for TaComputationContext<'a, '_> {
fn get_cfg(&self) -> &Cfg<'a> {
self.pi_result.get_graph()
}
}
impl HasVsaResult<PiData> for TaComputationContext<'_, '_> {
fn vsa_result(&self) -> &impl VsaResult<ValueDomain = PiData> {
self.pi_result
}
}
impl AsRef<Project> for TaComputationContext<'_, '_> {
fn as_ref(&self) -> &Project {
self.project
}
}
impl<'a> TaintAnalysis<'a> for TaComputationContext<'a, '_> {
/// Generates a CWE warning when a transition function returns the empty
/// state.
///
/// If a transition function returns the empty state this implies that there
/// is some time in the program where all information about the return value
/// of a fallible call has been eradicated without a previous check.
///
/// From this time onwards, the program cannot possibly know if the fallible
/// operation has succeeded or not. This point is reported as a possible bug
/// by the check.
///
/// For an example where this rule is needed to detect an error consider the
/// following program:
///
/// ```c
/// x = fallible_call();
///
/// if (some_unrelated_condition) {
/// x = 42;
/// }
///
/// if (x < 0) {
/// return x;
/// }
///
/// // Do something that assumes that `fallible_call` has worked.
/// ```
///
/// A pure forward 'may' data flow analysis would only see a tainted `x`
/// being used in a comparison and consider the fallible call to be checked.
/// With this condition, the analysis would emit a warning for the term
/// `x = 42` since at this point all information about the return value is
/// lost.
fn handle_empty_state_out(&self, tid: &Tid) -> Option<TaState> {
self.generate_cwe_warning(tid, "empty_state");
None
}
/// Update taint state on call to extern function.
///
/// We almost always just want to remove the taint from non-callee-saved
/// registers. However, for calls/jumps into nonreturning functions at the
/// end of a procedure we need some logic to suppress false positives.
fn update_extern_call(
&self,
state: &TaState,
_call: &Term<Jmp>,
project: &Project,
extern_symbol: &ExternSymbol,
) -> Option<TaState> {
// External symbols that are unconditional termination points. It does
// not make sense to propagate taint through those so we always return
// `None`.
if extern_symbol.no_return {
return None;
}
// External symbols that are effectively no-ops. Here we always return
// the unmodified input state.
//
// FIXME: This is an artifact of the way in which we generate the CFG.
// On x86 and amd64 kernel functions end in a 'jmp retpoline' but we
// generate a 'call extern; return;', where we introduced an artificial
// isolated return node. Another workaround would be to replace the jmp
// with a ret in a normalization pass.
const EXTERN_NOOP: [&str; 1] = ["__x86_return_thunk"];
let mut new_state = state.clone();
if !EXTERN_NOOP.iter().any(|s| s == &extern_symbol.name) {
new_state.remove_non_callee_saved_taint(project.get_calling_convention(extern_symbol));
}
Some(new_state)
}
/// Stops taint propagation if jump depends on a tainted condition.
///
/// We assume that any check that depends on tainted values is a check of
/// the return value of the fallible function, and that the program handles
/// all outcomes correctly.
///
/// A jump can depend on a tainted condition in two ways, either it is
/// executed because the condition evaluated to `true`, or because it
/// evaluated to `false`, both cases must be handled here.
fn update_jump(
&self,
state: &TaState,
jump: &Term<Jmp>,
untaken_conditional: Option<&Term<Jmp>>,
_target: &Term<Blk>,
) -> Option<TaState> {
// If this control flow transfer depends on a condition involving
// a tainted value then we do not propagate any taint information to
// the destination.
match (&jump.term, untaken_conditional) {
// Directly depends on a tainted value.
(Jmp::CBranch { condition, .. }, _) if state.eval(condition).is_tainted() => None,
// Branch is only taken because a condition based on a tainted value
// evaluated to false.
(
_,
Some(Term {
tid: _,
term: Jmp::CBranch { condition, .. },
}),
) if state.eval(condition).is_tainted() => None,
// Does not depend on tainted values.
_ => {
if state.is_empty() {
self.handle_empty_state_out(&jump.tid)
} else {
Some(state.clone())
}
}
}
}
/// Propagates taint from callee to caller.
///
/// The check performs a bottom-up interprocedural taint analysis. The main
/// idea is that: *If a function may propagate the return value of a
/// fallible function call to its caller without checking it, then the
/// function itself becomes "must check".* (In the sense that the caller is
/// now responsible for checking its return value.)
///
/// The taint may be returned directly, indirectly by returning a pointer
/// that can be used to reach taint, or written to global variables
/// (possibly as a pointer).
///
/// Limitations stem from two sources:
///
/// - The calling convention may be unknown, which means we can not
/// determine precisely which taint will be reachable for the caller.
///
/// See the source code comments for further information.
///
/// Furthermore, this callback is responsible for detecting cases where
/// taint may reach the end of a function without being returned.
/// This implies that the called function may have ignored the return
/// value of a fallible function call and there is no way for the caller to
/// know the outcome. The check raises a warning in those cases.
fn update_return_callee(
&self,
state: &TaState,
call_term: &Term<Jmp>,
return_term: &Term<Jmp>,
calling_convention: &Option<String>,
) -> Option<TaState> {
let (mut propagated_register_taint, mut propagated_memory_taint) =
state.clone().into_mem_reg_taint();
// Only keep memory taint that will be propagated to the caller. We
// compute this here since we want to notice when no taint is
// propagated.
let renaming_map = self.pi_result.get_call_renaming_map(&call_term.tid);
propagated_memory_taint.retain(|aid, _| {
// This is still an over-approximation to the taint that will be
// available to the caller since it might happen that all relative
// values have non-exactly-known offsets.
renaming_map.is_some_and(|renaming_map| {
renaming_map
.get(aid)
.is_some_and(|value| value.referenced_ids().next().is_some())
})
});
if let Some(calling_convention) = self
.project
.get_specific_calling_convention(calling_convention)
{
let return_registers = calling_convention.get_all_return_register();
// If there are tainted return registers we propagate the taint to
// the caller, which makes them responsible for checking it.
propagated_register_taint
.retain(|reg, taint| return_registers.contains(®) && taint.is_tainted());
} else {
// We have tainted registers but we do not know the calling
// convention. Here we simply return the complete register taint
// of the callee to the caller. This heuristic should in practice
// hopefully be a good approximation to the real calling convention:
// - It is an over approximation so return registers will be
// propagated correctly.
// - There is a chance that callee-saved registers have been
// overwritten with their saved values by the function epilog and
// are thus not tainted.
// - There is a chance that caller saved registers will be restored
// by the caller such that the taint is immediatly eliminated and
// we catch cases where the called function has ignored tainted
// values.
};
let propagated_state =
TaState::from_mem_reg_taint(propagated_register_taint, propagated_memory_taint);
if propagated_state.is_empty() {
// If we can not propagate any taint to the caller it is implied
// that
//
// - the return value of a fallible function call may have been
// ignored by the callee
// - AND the result is not returned to the caller
//
// Thus, callers of this function have no way to know if the
// operation it performs was successful and the function itself
// might not have checked it either.
self.generate_cwe_warning(&return_term.tid, "return_no_taint");
None
} else {
Some(propagated_state)
}
}
}