Created
March 23, 2021 08:17
-
-
Save ZaydH/7d4d9b281385f8bff6ba823fc65684a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import pickle | |
from pathlib import Path | |
import torch | |
import numpy as np | |
from tqdm import tqdm | |
DATA_DIR = Path("data") | |
def main(): | |
if len(sys.argv) != 3: | |
print("Command Args: %s <MalwareFolder> <BenignFolder>" % sys.argv[0]) | |
exit(1) | |
malicious_dir = Path(sys.argv[1]) | |
# malicious_dir += os.sep if malicious_dir[-1] != os.sep else "" | |
benign_dir = Path(sys.argv[2]) | |
filez = dict() | |
# benign_dir += os.sep if benign_dir[-1] != os.sep else "" | |
for vector_dir in [malicious_dir, benign_dir]: | |
export_name = "malicious" if vector_dir == malicious_dir else "benign" | |
all_vec, file_names = [], [] | |
num_files = sum(1 for _ in vector_dir.iterdir()) | |
f_iter = vector_dir.iterdir() | |
desc = export_name + " processing" | |
for vec_file in tqdm(f_iter, ncols=80, total=num_files, file=sys.stdout, desc=desc): | |
file_names.append(vec_file.name) | |
with open(str(vec_file), "rb") as vec_in: | |
all_vec.append(pickle.load(vec_in)) | |
# Export the file information | |
merged_vec = torch.cat(all_vec) | |
np.save(str(DATA_DIR / (export_name + ".npy")), merged_vec) | |
filez["x" + export_name[:3]] = merged_vec | |
with open(DATA_DIR / ("files_" + export_name + ".txt"), "w+") as f_out: | |
f_out.write("\n".join(file_names)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment