Created
June 26, 2018 03:36
-
-
Save Eh2406/91dc9bed999bf3d1ec64cfb6f98d62c9 to your computer and use it in GitHub Desktop.
fuzz cargo generate-lockfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import os | |
import subprocess | |
import time | |
import json | |
import csv | |
from threading import Timer | |
import pandas as pd | |
def run(folder, deps, timeout_sec=1.0): | |
open(os.path.join(folder, 'main.rs'), 'w') | |
with open(os.path.join(folder, 'Cargo.toml'), 'w') as f: | |
f.write(""" | |
[package] | |
name = "cargo_speed_test" | |
version = "0.1.0" | |
[lib] | |
path = "main.rs" | |
[dependencies] | |
""") | |
for name, ver in deps.iteritems(): | |
f.write('{} = "{}"\n'.format(name, ver)) | |
start = time.clock() | |
with open(os.devnull, "w") as n: | |
proc = subprocess.Popen(cargo_path + ["generate-lockfile", "-Zno-index-update"], | |
cwd=folder, stdout=n, stderr=n) | |
timer = Timer(timeout_sec, proc.kill) | |
try: | |
timer.start() | |
out_code = proc.wait() | |
finally: | |
timer.cancel() | |
out_time = time.clock() - start | |
out_trans_deps = [] | |
if out_code == 0: | |
with open(os.path.join(folder, 'Cargo.lock'), 'r') as f: | |
lock = f.read() | |
lock = lock.split("[metadata]")[1] | |
out_trans_deps = sorted(row.split()[1] for row in lock.splitlines() if len(row) > 5) | |
return out_code, out_time, out_trans_deps | |
def read_index(folder): | |
for root, dirnames, filenames in os.walk(folder): | |
dirnames[:] = [d for d in dirnames if not d.startswith(".")] | |
for name in filenames: | |
if name != 'config.json': | |
with open(os.path.join(root, name)) as f: | |
for line in f.readlines(): | |
line = json.loads(line) | |
if line['yanked']: | |
continue | |
yield name, line['vers'], len(line['deps']) | |
def logged_run(deps, log_writer, timeout_sec=1.0): | |
out = run("temp", deps, timeout_sec) | |
dumps = json.dumps(deps) | |
log_writer.writerow([out[0], out[1], dumps, ','.join(out[2])]) | |
print(out[0], out[1]) | |
return out | |
def save_index(): | |
"""walk the simple file checkout of the index and convert to a csv""" | |
index = sorted(read_index(index_path)) | |
pd.DataFrame(index, columns=['crate', 'ver', 'num_debs']).to_csv("index.csv") | |
def save_straight_pass(): | |
"""read the index csv and check that each version builds on its own""" | |
index = pd.read_csv("index.csv", index_col=0) | |
with open("straight_pass.csv", "ab") as log_file: | |
log_writer = csv.writer(log_file) | |
log_writer.writerow(["code", "time", "dumps", "trans_deps"]) | |
# if num_debs == 0 then the lock file is empty | |
# if num_debs == 1 then the lock file is redundant with that dep | |
index = index[index.num_debs > 1] | |
for _, row in index.iterrows(): | |
logged_run({row.crate: row.ver}, log_writer) | |
def save_second_pass(): | |
"""read the things that build on its own and check combination | |
This is to find cases like: | |
- https://github.com/rust-lang/cargo/issues/4810#issuecomment-357553286 | |
where having two pinned deps courses cargo to hang. | |
First I was going to try all pairs of deps, but .1 sec * 0.5 (6k ^ 2) is a long time. | |
Then I was going to do all pairs with overlap in lock files (hence the `trans_deps` coll) | |
but even that would take years. | |
Then I realized that as long as the problematic deps are in the list then: | |
1. something else will unhelpfully cause it to fail fast. | |
2. it will hang. | |
so the plan is to add random deps one at a time. | |
if it fails fast: | |
-> then it will be hard to learn anything from extensions so back out the one just added. | |
if it passes fast: | |
-> add the next one. Hopefully it will have a bad reaction to anyone already in the list. | |
Thanks to the birthday paradox this will test a lot of combinations fast. | |
if it takes a long time: | |
-> it is probably just something random, like antivirus trying to understand what is going on | |
So retry with a longer time out. (in practice I never hit this.) | |
""" | |
straight_pass = pd.read_csv("straight_pass.csv", | |
converters={ | |
'dumps': json.loads, | |
'trans_deps': lambda x: set(x.split(",")) | |
}) | |
straight_pass['crate'] = straight_pass.dumps.apply(lambda x: x.keys()[0]) | |
straight_pass['ver'] = straight_pass.dumps.apply(lambda x: x.values()[0]) | |
del straight_pass['dumps'] | |
straight_pass = straight_pass.sort_values(["crate", "ver"]) | |
straight_pass = straight_pass[straight_pass.code == 0] | |
straight_pass = straight_pass[straight_pass.ver > "0.0.9"] | |
print (len(straight_pass)) | |
with open("second_pass.csv", "wb") as log_file: | |
log_writer = csv.writer(log_file) | |
log_writer.writerow(["code", "time", "dumps", "trans_deps"]) | |
while True: | |
deta = {} | |
for _, row1 in straight_pass.sample(1000).iterrows(): | |
print (row1.crate.rjust(20, ' '), len(deta), "\t", end='') | |
if row1.crate in deta: | |
print ("already in") | |
continue | |
deta[row1.crate] = "=" + row1.ver | |
out = logged_run(deta, log_writer, 30.0) | |
if out[0] == 1: | |
print ("time out!!") | |
out = logged_run(deta, log_writer, 60.0) | |
if out[0] == 1: | |
print ("time out!!") | |
out = logged_run(deta, log_writer, 120.0) | |
if out[0] != 0: | |
del deta[row1.crate] | |
cargo_path = ["cargo", "+nightly"] | |
index_path = r"../crates.io-index" | |
save_index() | |
save_straight_pass() | |
save_second_pass() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment