Skip to content

Instantly share code, notes, and snippets.

@kurtlawrence
Created September 22, 2024 03:38
Show Gist options
  • Save kurtlawrence/23f1194e60f3105c489547074400b9fd to your computer and use it in GitHub Desktop.
Save kurtlawrence/23f1194e60f3105c489547074400b9fd to your computer and use it in GitHub Desktop.
Rust script to investigate duplicate dependencies that could be remedied
#!/usr/bin/env -S rust-script -c
//! You might need to chmod +x your script!
//! ```cargo
//! [dependencies.rust-script-ext]
//! git = "https://github.com/kurtlawrence/rust-script-ext"
//! rev = "e914bc0a04e9216ffdfd15c47f4c39b74d98bbeb"
//!
//! [dependencies]
//! colored = "2.1.0"
//! ```
use colored::*;
use rust_script_ext::prelude::*;
use std::{
collections::{BTreeMap, HashMap},
rc::Rc,
str::FromStr,
};
fn main() -> Result<()> {
let debug = args().has(|x| x == "--debug");
let mut duplicates =
collect_duplicates()?
.into_iter()
.fold(BTreeMap::<_, Vec<_>>::default(), |mut map, dep| {
map.entry(dep.pkg.clone()).or_default().push(dep);
map
});
duplicates.values_mut().for_each(|x| {
x.sort_unstable();
x.dedup();
});
duplicates.retain(|_, ds| ds.len() > 1);
if debug {
duplicates.iter().for_each(|(p, ds)| {
println!("{p}");
ds.iter().for_each(|d| println!(" {}", d.ver));
});
}
println!(
"{}",
format!(
"Found {} packages with duplicates",
duplicates.len().to_string().cyan()
)
.bold()
);
let pkgs = collect_packages()?;
for (pkg, deps) in duplicates {
let mut deps = deps
.into_iter()
.map(|dep| {
let x = find_minimum_effort(pkgs.get(&dep).expect("pkg to exist"));
(dep, x)
})
.collect::<Vec<_>>();
deps.sort_unstable_by_key(|x| x.1.heuristic);
println!();
print_pkg_summary(&pkg, &deps, debug);
}
Ok(())
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
struct Dep {
pkg: String,
ver: String,
own: Ownership,
}
type RcDep = Rc<Dep>;
impl FromStr for Dep {
type Err = deps::miette::Report;
fn from_str(s: &str) -> Result<Self> {
let mut s = s.splitn(3, ' ');
let pkg = s
.next()
.map(String::from)
.ok_or_else(|| miette!("expecting package name"))?;
let ver = s
.next()
.map(String::from)
.ok_or_else(|| miette!("expecting package version"))?;
let own = s.next().map_or(Ok(Ownership::None), Ownership::from_str)?;
Ok(Self { pkg, ver, own })
}
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
enum Ownership {
None,
Path(String),
Registry(String),
}
impl Ownership {
fn is_owned(&self) -> bool {
!matches!(self, Ownership::None)
}
}
impl FromStr for Ownership {
type Err = deps::miette::Report;
fn from_str(input: &str) -> Result<Self> {
let s = input.trim_end_matches(" (*)"); // dupe identifier?
if let Some(s) = s.strip_prefix('(') {
if s.starts_with("proc-macro") {
Ok(Ownership::None)
} else if let Some(s) = s.strip_prefix("registry `") {
Ok(Ownership::Registry(
s.strip_suffix("`)")
.ok_or_else(|| miette!("invalid registry format: {input}"))?
.to_owned(),
))
} else {
// we assume it is a path dependency
Ok(Ownership::Path(
s.strip_suffix(")")
.ok_or_else(|| miette!("invalid path dep format"))?
.to_owned(),
))
}
} else {
Err(miette!("expecting brackets"))
}
}
}
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
struct Heuristic {
order: u8,
depth: u32,
}
impl Ord for Heuristic {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
let Self { order, depth } = other;
self.order.cmp(order).then_with(|| self.depth.cmp(depth))
}
}
impl PartialOrd for Heuristic {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
#[derive(Debug)]
struct Foo {
heuristic: Heuristic,
owner: RcDep,
children: Vec<RcDep>,
}
fn collect_duplicates() -> Result<Vec<Dep>> {
String::from_utf8(
cmd!(cargo: tree, -d, --depth, 0)
.output()
.into_diagnostic()?
.stdout,
)
.into_diagnostic()
.map(|output| {
output
.lines()
.map(|line| line.trim())
.filter(|&line| !line.is_empty() && line != "[build-dependencies]")
.filter_map(|line| {
Dep::from_str(line)
.inspect_err(|e| eprintln!("could not parse dependency line `{line}`: {e}"))
.ok()
})
.collect()
})
}
type Ancestors = Vec<RcDep>;
fn collect_packages() -> Result<HashMap<RcDep, Vec<Ancestors>>> {
let cargo_output = String::from_utf8(
cmd!(cargo: tree, --prefix=depth)
.output()
.into_diagnostic()?
.stdout,
)
.into_diagnostic()?;
let mut map = HashMap::<_, Vec<_>>::default();
let mut ancestors = Vec::new();
for line in cargo_output
.lines()
.map(|x| x.trim())
.filter(|&line| !line.is_empty() && line != "[build-dependencies]")
{
let (depth, s) = line
.char_indices()
.find_map(|(i, c)| (!c.is_ascii_digit()).then_some(i))
.map(|idx| line.split_at(idx))
.ok_or_else(|| miette!("expecting pkg to begin with depth"))
.wrap_err_with(|| format!("invalid output line: {line}"))?;
let depth = depth.parse::<usize>().expect("ASCII digits within usize");
let dep = Rc::new(Dep::from_str(s)?);
if depth < ancestors.len() {
ancestors.truncate(depth);
}
ancestors.push(Rc::clone(&dep));
map.entry(dep).or_default().push(ancestors.clone());
}
Ok(map)
}
fn find_minimum_effort(ancestors: &[Ancestors]) -> Foo {
ancestors
.iter()
.filter_map(|xs| {
let depth = u32::try_from(xs.len())
.expect("that's alotta deps!")
.saturating_sub(1);
// find the _first_ dep which has ownership
// we expect to ALWAYS have a crate which has ownership!
let owned_depth = xs.iter().rev().position(|x| x.own.is_owned())?;
let (owner, children) = xs[xs.len().saturating_sub(owned_depth + 1)..]
.split_first()
.expect("non-empty");
// a neat happenstance is that the order is the same as the owner depth
let order = u8::try_from(owned_depth.min(255)).expect("inside u8");
Some(Foo {
owner: owner.clone(),
children: children.to_vec(),
heuristic: Heuristic { order, depth },
})
})
.min_by_key(|x| x.heuristic)
.expect("at least one dep")
}
fn print_pkg_summary(pkg: &str, deps: &[(Dep, Foo)], debug: bool) {
println!("Summary for package: {}", pkg.cyan().bold());
let vers = deps
.iter()
.map(|x| x.0.ver.as_str())
.collect::<Vec<_>>()
.join(" ")
.italic();
println!(" there are {} versions: {vers}", deps.len());
let filtered = !debug && deps.iter().all(|d| d.1.heuristic.order >= 3);
if filtered {
println!(" {}", "all packages are of futile order".italic());
}
for (
dep,
Foo {
heuristic,
owner,
children,
},
) in deps.iter().filter(|_| !filtered)
{
let filtered = !debug && heuristic.order >= 3;
let heuristic = format!("{}:{}", heuristic.order, heuristic.depth);
println!(
"({}) {} {}:{}",
heuristic.bold().purple(),
dep.pkg.italic(),
dep.ver.italic().bold(),
if filtered {
" filtering futile order"
} else {
""
}
);
if filtered {
continue;
}
println!(
" {} {} {}",
owner.pkg,
owner.ver,
match &owner.own {
Ownership::None => "",
Ownership::Path(p) => p.as_str(),
Ownership::Registry(r) => r.as_str(),
}
);
let path = children
.iter()
.map(|dep| format!("{} ({})", dep.pkg, dep.ver))
.collect::<Vec<_>>()
.join(" ➡️ ");
if !path.is_empty() {
println!(" ➡️ {path}");
}
}
println!();
println!("---");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment