Created
August 12, 2019 11:33
-
-
Save ZaydH/1c70a1b254d5326250cd561e59e10c9b to your computer and use it in GitHub Desktop.
Script for Merging all SLEIPNIR Feature Vectors
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import pickle | |
from pathlib import Path | |
import torch | |
import numpy as np | |
from tqdm import tqdm | |
DATA_DIR = Path("data") | |
def main(): | |
if len(sys.argv) != 3: | |
print("Command Args: %s <MalwareFolder> <BenignFolder>" % sys.argv[0]) | |
exit(1) | |
malicious_dir = Path(sys.argv[1]) | |
# malicious_dir += os.sep if malicious_dir[-1] != os.sep else "" | |
benign_dir = Path(sys.argv[2]) | |
filez = dict() | |
# benign_dir += os.sep if benign_dir[-1] != os.sep else "" | |
for vector_dir in [malicious_dir, benign_dir]: | |
export_name = "malicious" if vector_dir == malicious_dir else "benign" | |
all_vec, file_names = [], [] | |
num_files = sum(1 for _ in vector_dir.iterdir()) | |
f_iter = vector_dir.iterdir() | |
desc = export_name + " processing" | |
for vec_file in tqdm(f_iter, ncols=80, total=num_files, file=sys.stdout, desc=desc): | |
file_names.append(vec_file.name) | |
with open(str(vec_file), "rb") as vec_in: | |
all_vec.append(pickle.load(vec_in)) | |
# Export the file information | |
merged_vec = torch.cat(all_vec) | |
np.save(str(DATA_DIR / (export_name + ".npy")), merged_vec) | |
filez["x" + export_name[:3]] = merged_vec | |
with open(DATA_DIR / ("files_" + export_name + ".txt"), "w+") as f_out: | |
f_out.write("\n".join(file_names)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment