Skip to content

Commit

Permalink
Analyze unsafe code reachability
Browse files Browse the repository at this point in the history
Add callgraph analysis to scanner in order to find the distance between
functions in a crate and unsafe functions.

For that, we build the crate call graph and collect the unsafe
functions. After that, do reverse BFS traversal from the unsafe
functions and store the distance to other functions.
The result is stored in a new csv file.
  • Loading branch information
celinval committed Sep 24, 2024
1 parent cda1b30 commit df8fc1b
Show file tree
Hide file tree
Showing 10 changed files with 1,513 additions and 55 deletions.
1,320 changes: 1,294 additions & 26 deletions Cargo.lock

Large diffs are not rendered by default.

24 changes: 16 additions & 8 deletions docs/src/rustc-hacks.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,27 @@ Note that pretty printing for the Rust nightly toolchain (which Kani uses) is no
For example, a vector may be displayed as `vec![{...}, {...}]` on nightly Rust, when it would be displayed as `vec![Some(0), None]` on stable Rust.
Hopefully, this will be fixed soon.

### CLion / IntelliJ
### RustRover / CLion
This is not a great solution, but it works for now (see <https://github.com/intellij-rust/intellij-rust/issues/1618>
for more details).
Edit the `Cargo.toml` of the package that you're working on and add artificial dependencies on the `rustc` packages that you would like to explore.

Open the `Cargo.toml` of your crate (e.g.: `kani-compiler`), and do the following:

1. Add optional dependencies on the `rustc` crates you are using.
2. Add a feature that enable those dependencies.
3. Toggle that feature using the IDE GUI.

Here is an example:

```toml
# This configuration doesn't exist so it shouldn't affect your build.
[target.'cfg(KANI_DEV)'.dependencies]
# ** At the bottom of the dependencies section: **
# Adjust the path here to point to a local copy of the rust compiler.
# The best way is to use the rustup path. Replace <toolchain> with the
# proper name to your toolchain.
rustc_driver = { path = "~/.rustup/toolchains/<toolchain>/lib/rustlib/rustc-src/rust/compiler/rustc_driver" }
rustc_interface = { path = "~/.rustup/toolchains/<toolchain>/lib/rustlib/rustc-src/rust/compiler/rustc_interface" }
# E.g.: ~/.rustup/toolchains/<toolchain>/lib/rustlib/rustc-src/rust/compiler
rustc_smir = { path = "<path_to_rustc>/rustc_smir", optional = true }
stable_mir = { path = "<path_to_rustc>/stable_mir", optional = true }

[features]
clion = ['rustc_smir', 'stable_mir']
```

**Don't forget to rollback the changes before you create your PR.**
Expand Down
3 changes: 2 additions & 1 deletion scripts/std-analysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ export RUSTC_LOG=error
RUST_FLAGS=(
"-Cpanic=abort"
"-Zalways-encode-mir"
"--emit=mir"
)
export RUSTFLAGS="${RUST_FLAGS[@]}"
export RUSTC="$KANI_DIR/target/debug/scan"
Expand Down Expand Up @@ -104,7 +105,7 @@ for f in $results/*overall.csv; do
fname=$(basename $f)
crate=${fname%_scan_overall.csv}
echo -n "$crate," >> $summary
tr -d [:alpha:]_,; < $f | tr -s '\n' ',' \
tr -d "[:alpha:]_,;" < $f | tr -s '\n' ',' \
>> $summary
echo "" >> $summary
done
Expand Down
4 changes: 2 additions & 2 deletions tests/script-based-pre/tool-scanner/scanner-test.expected
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
5 test_scan_fn_loops.csv
19 test_scan_functions.csv
20 test_scan_functions.csv
5 test_scan_input_tys.csv
16 test_scan_overall.csv
3 test_scan_recursion.csv
5 test_scan_unsafe_ops.csv
6 test_scan_unsafe_ops.csv
2 changes: 1 addition & 1 deletion tests/script-based-pre/tool-scanner/scanner-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ cargo run -p scanner test.rs --crate-type lib
wc -l *csv

popd
rm -rf ${OUT_DIR}
#rm -rf ${OUT_DIR}
13 changes: 13 additions & 0 deletions tests/script-based-pre/tool-scanner/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ pub fn generic<T: Default>() -> T {
T::default()
}

pub fn blah() {
ok();
assert_eq!(u8::default(), 0);
}

pub struct RecursiveType {
pub inner: Option<*const RecursiveType>,
}
Expand Down Expand Up @@ -102,3 +107,11 @@ pub fn start_recursion() {
pub fn not_recursive() {
let _ = ok();
}

extern "C" {
fn external_function();
}

pub fn call_external() {
unsafe { external_function() };
}
8 changes: 7 additions & 1 deletion tools/scanner/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,14 @@ strum_macros = "0.26"
petgraph = "0.6.5"
graph-cycles = "0.1.0"

# Enable clion code analysis on rustc for kani-compiler
rustc_smir = { path = "../../../rustc_link/rustc_smir", optional = true }
stable_mir = { path = "../../../rustc_link/stable_mir", optional = true }

[features]
clion = ['rustc_smir', 'stable_mir']

[package.metadata.rust-analyzer]
# This crate uses rustc crates.
# More info: https://github.com/rust-analyzer/rust-analyzer/pull/7891
rustc_private = true

84 changes: 69 additions & 15 deletions tools/scanner/src/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ use serde::{ser::SerializeStruct, Serialize, Serializer};
use stable_mir::mir::mono::Instance;
use stable_mir::mir::visit::{Location, PlaceContext, PlaceRef};
use stable_mir::mir::{
BasicBlock, Body, MirVisitor, Mutability, ProjectionElem, Safety, Terminator, TerminatorKind,
BasicBlock, Body, CastKind, MirVisitor, Mutability, NonDivergingIntrinsic, ProjectionElem,
Rvalue, Safety, Statement, StatementKind, Terminator, TerminatorKind,
};
use stable_mir::ty::{AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind};
use stable_mir::ty::{Abi, AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind};
use stable_mir::visitor::{Visitable, Visitor};
use stable_mir::{CrateDef, CrateItem};
use std::collections::{HashMap, HashSet};
Expand All @@ -23,7 +24,7 @@ use std::path::{Path, PathBuf};
#[derive(Clone, Debug)]
pub struct OverallStats {
/// The key and value of each counter.
counters: Vec<(&'static str, usize)>,
pub counters: Vec<(&'static str, usize)>,
/// TODO: Group stats per function.
fn_stats: HashMap<CrateItem, FnStats>,
}
Expand All @@ -35,6 +36,12 @@ struct FnStats {
has_unsafe_ops: Option<bool>,
has_unsupported_input: Option<bool>,
has_loop: Option<bool>,
/// How many degrees of separation to unsafe code if any?
/// - `None` if this function is indeed safe.
/// - 0 if this function contains unsafe code (including invoking unsafe fns).
/// - 1 if this function calls a safe abstraction.
/// - 2+ if this function calls other functions that call safe abstractions.
unsafe_distance: Option<usize>,
}

impl FnStats {
Expand All @@ -46,6 +53,7 @@ impl FnStats {
has_unsupported_input: None,
// TODO: Implement this.
has_loop: None,
unsafe_distance: None,
}
}
}
Expand Down Expand Up @@ -230,24 +238,24 @@ impl OverallStats {

macro_rules! fn_props {
($(#[$attr:meta])*
struct $name:ident {
$vis:vis struct $name:ident {
$(
$(#[$prop_attr:meta])*
$prop:ident,
)+
}) => {
#[derive(Debug)]
struct $name {
$vis struct $name {
fn_name: String,
$($(#[$prop_attr])* $prop: usize,)+
}

impl $name {
const fn num_props() -> usize {
pub const fn num_props() -> usize {
[$(stringify!($prop),)+].len()
}

fn new(fn_name: String) -> Self {
pub fn new(fn_name: String) -> Self {
Self { fn_name, $($prop: 0,)+}
}
}
Expand Down Expand Up @@ -367,7 +375,7 @@ impl<'a> Visitor for TypeVisitor<'a> {
}
}

fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
pub(crate) fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
out_path.set_extension("csv");
info(format!("Write file: {out_path:?}"));
let mut writer = WriterBuilder::new().delimiter(b';').from_path(&out_path).unwrap();
Expand All @@ -377,17 +385,23 @@ fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
}

fn_props! {
struct FnUnsafeOperations {
pub struct FnUnsafeOperations {
inline_assembly,
/// Dereference a raw pointer.
/// This is also counted when we access a static variable since it gets translated to a raw pointer.
unsafe_dereference,
/// Call an unsafe function or method.
/// Call an unsafe function or method including C-FFI.
unsafe_call,
/// Access or modify a mutable static variable.
unsafe_static_access,
/// Access fields of unions.
unsafe_union_access,
/// Invoke external functions (this is a subset of `unsafe_call`.
extern_call,
/// Transmute operations.
transmute,
/// Cast raw pointer to reference.
unsafe_cast,
}
}

Expand Down Expand Up @@ -417,9 +431,21 @@ impl<'a> MirVisitor for BodyVisitor<'a> {
fn visit_terminator(&mut self, term: &Terminator, location: Location) {
match &term.kind {
TerminatorKind::Call { func, .. } => {
let fn_sig = func.ty(self.body.locals()).unwrap().kind().fn_sig().unwrap();
if fn_sig.value.safety == Safety::Unsafe {
let TyKind::RigidTy(RigidTy::FnDef(fn_def, _)) =
func.ty(self.body.locals()).unwrap().kind()
else {
return self.super_terminator(term, location);
};
let fn_sig = fn_def.fn_sig().skip_binder();
if fn_sig.safety == Safety::Unsafe {
self.props.unsafe_call += 1;
if !matches!(
fn_sig.abi,
Abi::Rust | Abi::RustCold | Abi::RustCall | Abi::RustIntrinsic
) && !fn_def.has_body()
{
self.props.extern_call += 1;
}
}
}
TerminatorKind::InlineAsm { .. } => self.props.inline_assembly += 1,
Expand All @@ -428,6 +454,34 @@ impl<'a> MirVisitor for BodyVisitor<'a> {
self.super_terminator(term, location)
}

fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) {
if let Rvalue::Cast(cast_kind, operand, ty) = rvalue {
match cast_kind {
CastKind::Transmute => {
self.props.transmute += 1;
}
_ => {
let operand_ty = operand.ty(self.body.locals()).unwrap();
if ty.kind().is_ref() && operand_ty.kind().is_raw_ptr() {
self.props.unsafe_cast += 1;
}
}
}
};
self.super_rvalue(rvalue, location);
}

fn visit_statement(&mut self, stmt: &Statement, location: Location) {
if matches!(
&stmt.kind,
StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(_))
) {
// Treat this as invoking the copy intrinsic.
self.props.unsafe_call += 1;
}
self.super_statement(stmt, location)
}

fn visit_projection_elem(
&mut self,
place: PlaceRef,
Expand Down Expand Up @@ -672,9 +726,9 @@ impl Recursion {
}
}

struct FnCallVisitor<'a> {
body: &'a Body,
fns: Vec<FnDef>,
pub struct FnCallVisitor<'a> {
pub body: &'a Body,
pub fns: Vec<FnDef>,
}

impl<'a> MirVisitor for FnCallVisitor<'a> {
Expand Down
101 changes: 101 additions & 0 deletions tools/scanner/src/call_graph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Provide different static analysis to be performed in the call graph

use crate::analysis::{FnCallVisitor, FnUnsafeOperations, OverallStats};
use stable_mir::mir::{MirVisitor, Safety};
use stable_mir::ty::{FnDef, RigidTy, Ty, TyKind};
use stable_mir::{CrateDef, CrateDefType};
use std::collections::hash_map::Entry;
use std::collections::{HashMap, VecDeque};
use std::hash::{Hash, Hasher};
use std::path::PathBuf;

impl OverallStats {
/// Iterate over all functions defined in this crate and log any unsafe operation.
pub fn unsafe_distance(&mut self, filename: PathBuf) {
let all_items = stable_mir::all_local_items();
let mut queue =
all_items.into_iter().filter_map(|item| Node::try_new(item.ty())).collect::<Vec<_>>();
// Build call graph
let mut call_graph = CallGraph::default();
while let Some(node) = queue.pop() {
if let Entry::Vacant(e) = call_graph.nodes.entry(node.def) {
e.insert(node);
let Some(body) = node.def.body() else {
continue;
};
let mut visitor = FnCallVisitor { body: &body, fns: vec![] };
visitor.visit_body(&body);
queue.extend(visitor.fns.iter().map(|def| Node::try_new(def.ty()).unwrap()));
for callee in &visitor.fns {
call_graph.rev_edges.entry(*callee).or_default().push(node.def)
}
call_graph.edges.insert(node.def, visitor.fns);
}
}

// Calculate the distance between unsafe functions and functions with unsafe operation.
let mut queue = call_graph
.nodes
.values()
.filter_map(|node| node.has_unsafe.then_some((node.def, 0)))
.collect::<VecDeque<_>>();
let mut visited: HashMap<FnDef, u16> = HashMap::from_iter(queue.iter().cloned());
while let Some(current) = queue.pop_front() {
for caller in call_graph.rev_edges.entry(current.0).or_default() {
if !visited.contains_key(caller) {
let distance = current.1 + 1;
visited.insert(*caller, distance);
queue.push_back((*caller, distance))
}
}
}
let krate = stable_mir::local_crate();
let transitive_unsafe = visited
.into_iter()
.filter_map(|(def, distance)| (def.krate() == krate).then_some((def.name(), distance)))
.collect::<Vec<_>>();
self.counters.push(("transitive_unsafe", transitive_unsafe.len()));
crate::analysis::dump_csv(filename, &transitive_unsafe);
}
}

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
struct Node {
def: FnDef,
is_unsafe: bool,
has_unsafe: bool,
}

impl Node {
fn try_new(ty: Ty) -> Option<Node> {
let kind = ty.kind();
let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind else {
return None;
};
let has_unsafe = if let Some(body) = def.body() {
let unsafe_ops = FnUnsafeOperations::new(def.name()).collect(&body);
unsafe_ops.has_unsafe()
} else {
true
};
let fn_sig = kind.fn_sig().unwrap();
let is_unsafe = fn_sig.skip_binder().safety == Safety::Unsafe;
Some(Node { def, is_unsafe, has_unsafe })
}
}

impl Hash for Node {
fn hash<H: Hasher>(&self, state: &mut H) {
self.def.hash(state)
}
}

#[derive(Default, Debug)]
struct CallGraph {
nodes: HashMap<FnDef, Node>,
edges: HashMap<FnDef, Vec<FnDef>>,
rev_edges: HashMap<FnDef, Vec<FnDef>>,
}
Loading

0 comments on commit df8fc1b

Please sign in to comment.