Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add analysis of unsafe code in dependencies through cargo geiger #72

Merged
merged 7 commits into from
Jun 22, 2021
223 changes: 215 additions & 8 deletions analysis/src/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use anyhow::{anyhow, Result};
use camino::Utf8Path;
use guppy::graph::{DependencyDirection, PackageGraph, PackageMetadata};
use serde::{Deserialize, Serialize};
use std::{cell::RefCell, collections::HashMap, iter};
use std::{cell::RefCell, collections::HashMap, fs, iter, path::PathBuf, process::Command};
use tokei::{Config, LanguageType, Languages};

#[derive(Debug, Clone)]
Expand All @@ -13,6 +13,7 @@ pub struct CodeReport {
pub version: String,
pub is_direct: bool,
pub loc_report: Option<LOCReport>,
pub unsafe_report: Option<UnsafeReport>,
pub dep_report: Option<DepReport>,
}

Expand All @@ -27,22 +28,97 @@ pub struct DepReport {
pub total_deps: u64,
pub deps_total_loc: u64,
pub deps_rust_loc: u64,
pub deps_forbidding_unsafe: u64,
pub deps_using_unsafe: u64,
}

#[derive(Serialize, Deserialize, Debug, Default, Clone)]
pub struct UnsafeReport {
// Unsafe code used by the cargo geiger
pub forbids_unsafe: bool,
pub unsafe_count: UsedUnsafeDetails,
}

#[derive(Serialize, Deserialize, Debug, Default, Clone)]
pub struct UsedUnsafeDetails {
pub functions: u64,
pub expressions: u64,
pub impls: u64,
pub traits: u64,
pub methods: u64,
}

pub struct CodeAnalyzer {
cache: RefCell<HashMap<String, LOCReport>>,
loc_cache: RefCell<HashMap<String, LOCReport>>,
geiger_cache: RefCell<HashMap<(String, String), GeigerPackageInfo>>,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct GeigerReport {
pub packages: Vec<GeigerPackageInfo>,
pub used_but_not_scanned_files: Vec<String>,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct GeigerPackageInfo {
pub package: GeigerPackage,
pub unsafety: Unsafety,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct GeigerPackage {
pub id: GeigerPackageId,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct GeigerPackageId {
pub name: String,
pub version: String,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Unsafety {
pub used: UnsafeInfo,
pub unused: UnsafeInfo,
pub forbids_unsafe: bool,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct UnsafeInfo {
pub functions: UnsafeCount,
pub exprs: UnsafeCount,
pub item_impls: UnsafeCount,
pub item_traits: UnsafeCount,
pub methods: UnsafeCount,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct UnsafeCount {
pub safe: u64,
pub unsafe_: u64,
}

impl CodeAnalyzer {
pub fn new() -> Self {
Self {
cache: RefCell::new(HashMap::new()),
loc_cache: RefCell::new(HashMap::new()),
geiger_cache: RefCell::new(HashMap::new()),
}
}

pub fn analyze_code(self, graph: &PackageGraph) -> Result<Vec<CodeReport>> {
let mut code_reports: Vec<CodeReport> = Vec::new();

// Get path to all packages in the workspace
let package_paths: Vec<&str> = graph
.workspace()
.iter()
.map(|pkg| pkg.manifest_path().as_str())
.collect();
// Run Geiger report for each member packages and store result in cache
// TODO: How to avoid multiple calls for Cargo Geiger and run only once?
self.get_cargo_geiger_report_for_workspace(package_paths)?;

// Get direct dependencies
let direct_dependencies: Vec<PackageMetadata> = graph
.query_workspace()
Expand All @@ -56,6 +132,9 @@ impl CodeAnalyzer {

for package in &direct_dependencies {
let loc_report = self.get_loc_report(package.manifest_path())?;
let unsafe_report =
self.get_unsafe_report(package.name().to_string(), package.version().to_string())?;

let dependencies: Vec<PackageMetadata> = graph
.query_forward(iter::once(package.id()))?
.resolve()
Expand All @@ -69,6 +148,7 @@ impl CodeAnalyzer {
version: package.version().to_string(),
is_direct: true,
loc_report: Some(loc_report),
unsafe_report: unsafe_report,
dep_report: Some(dep_report),
};

Expand All @@ -82,17 +162,34 @@ impl CodeAnalyzer {
let total_deps = dependencies.len() as u64;
let mut deps_total_loc = 0;
let mut deps_rust_loc = 0;
let mut deps_forbidding_unsafe = 0;
let mut deps_using_unsafe = 0;

for package in &dependencies {
let loc_report = self.get_loc_report(package.manifest_path())?;
deps_total_loc += loc_report.total_loc;
deps_rust_loc += loc_report.rust_loc;

let unsafe_report =
self.get_unsafe_report(package.name().to_string(), package.version().to_string())?;
if !unsafe_report.is_none() {
let unsafe_report = unsafe_report.unwrap();
if unsafe_report.forbids_unsafe {
deps_forbidding_unsafe += 1;
} else {
if unsafe_report.unsafe_count.expressions > 0 {
deps_using_unsafe += 1;
}
}
}
}

Ok(DepReport {
total_deps,
deps_total_loc,
deps_rust_loc,
deps_forbidding_unsafe,
deps_using_unsafe,
})
}

Expand All @@ -104,9 +201,9 @@ impl CodeAnalyzer {
)
})?;

if self.cache.borrow().contains_key(manifest_path.as_str()) {
if self.loc_cache.borrow().contains_key(manifest_path.as_str()) {
let code_report = self
.cache
.loc_cache
.borrow()
.get(manifest_path.as_str())
.ok_or_else(|| anyhow!("Caching error"))?
Expand All @@ -133,19 +230,96 @@ impl CodeAnalyzer {
.unwrap_or(0),
};

// put in cache
self.cache
// put in loc_cache
self.loc_cache
.borrow_mut()
.insert(manifest_path.to_string(), loc_report);

let code_report = self
.cache
.loc_cache
.borrow()
.get(manifest_path.as_str())
.ok_or_else(|| anyhow!("Caching error"))?
.clone();
Ok(code_report)
}

fn get_cargo_geiger_report_for_workspace(&self, package_paths: Vec<&str>) -> Result<()> {
// Cargo geiger only works with package tomls
// and not a virtual manifest file
// Therefore, we run cargo geiger on all member packages
// TODO: Revisit this design
let package_paths: Vec<PathBuf> = package_paths
.iter()
.map(|path| PathBuf::from(path))
.collect();

for path in &package_paths {
let geiger_report = Self::get_cargo_geiger_report(path)?;
let geiger_packages = geiger_report.packages;
for geiger_package in &geiger_packages {
let package = &geiger_package.package.id;
let key = (package.name.clone(), package.version.clone());
if !self.geiger_cache.borrow().contains_key(&key) {
self.geiger_cache
.borrow_mut()
.insert(key, geiger_package.clone());
}
}
}

Ok(())
}

fn get_cargo_geiger_report(absolute_path: &PathBuf) -> Result<GeigerReport> {
let absolute_path = fs::canonicalize(absolute_path)?;
let absolute_path = absolute_path
.to_str()
.ok_or_else(|| anyhow!("error in parsing absolute path for Cargo.toml"))?;

let output = Command::new("cargo")
.args(&[
"geiger",
"--output-format",
"Json",
"--manifest-path",
absolute_path, // only accepts absolute path
])
.output()?;

if !output.status.success() {
return Err(anyhow!("Error in running cargo geiger"));
}

let geiger_report: GeigerReport = serde_json::from_slice(&output.stdout)?;
Ok(geiger_report)
}

fn get_unsafe_report(&self, name: String, version: String) -> Result<Option<UnsafeReport>> {
let key = (name, version);
let cache = self.geiger_cache.borrow();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe there should be a get_cargo_geiger_report that grabs one from the cache based on the key? That would hide the cache a bit from the other code.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably it would return Option<CargoGeigerReport>.


if !cache.contains_key(&key) {
// Cargo geiger can not have a result for a valid dependency
// e.g., openssl not present for geiger report for valid_dep test crate
return Ok(None);
}

let geiger_package_info = cache
.get(&key)
.ok_or_else(|| anyhow!("Missing package in Geiger cache"))?;

Ok(Some(UnsafeReport {
forbids_unsafe: geiger_package_info.unsafety.forbids_unsafe,
unsafe_count: UsedUnsafeDetails {
functions: geiger_package_info.unsafety.used.functions.unsafe_,
expressions: geiger_package_info.unsafety.used.exprs.unsafe_,
impls: geiger_package_info.unsafety.used.item_impls.unsafe_,
traits: geiger_package_info.unsafety.used.item_traits.unsafe_,
methods: geiger_package_info.unsafety.used.methods.unsafe_,
},
}))
}
}

#[cfg(test)]
Expand Down Expand Up @@ -212,4 +386,37 @@ mod test {
println!("{:?}", code_reports);
assert!(code_reports.len() > 0)
}

#[test]
fn test_code_cargo_geiger() {
let path = PathBuf::from("resources/test/valid_dep/Cargo.toml");
let geiger_report = CodeAnalyzer::get_cargo_geiger_report(&path).unwrap();
println!("{:?}", geiger_report);
assert!(geiger_report.packages.len() > 0);
}

#[test]
fn test_code_geiger_report_for_workspace() {
let code_analyzer = get_test_code_analyzer();
let graph = MetadataCommand::new()
.current_dir(PathBuf::from(".."))
.build_graph()
.unwrap();

// Get path to all packages in the workspace
let package_paths: Vec<&str> = graph
.workspace()
.iter()
.map(|pkg| pkg.manifest_path().as_str())
.collect();

code_analyzer
.get_cargo_geiger_report_for_workspace(package_paths)
.unwrap();
println!(
"Total keys in geiger cache: {}",
code_analyzer.geiger_cache.borrow().len()
);
assert!(code_analyzer.geiger_cache.borrow().len() > 0);
}
}