Skip to content

Instantly share code, notes, and snippets.

@bede
Last active August 8, 2024 10:51
Show Gist options
  • Save bede/89c90755dd29490f33047e52f97078ad to your computer and use it in GitHub Desktop.
Save bede/89c90755dd29490f33047e52f97078ad to your computer and use it in GitHub Desktop.
Concatenate demultiplexed ONT FASTQs by barcode (for one or more runs)
"""
Purpose: Concatenate demultiplexed FASTQs by barcode for one or more ONT runs
Usage: python concat_by_barcode.py run1/fastq_pass run2/fastq_pass -o output/
Author: Bede Constantinides
"""
import subprocess
import sys
import argparse
from collections import defaultdict
from pathlib import Path
def concatenate_fastqs_by_barcode(fastq_pass_dirs: [list[Path]], output_dir: Path):
barcodes_paths = defaultdict(list)
fq_ext = ".fastq.gz"
barcodes = [f"barcode{str(i).zfill(2)}" for i in range(1, 97)]
for barcode in barcodes:
for d in fastq_pass_dirs:
barcode_dir = d / barcode
if barcode_dir.exists():
fastq_paths = [f for f in barcode_dir.iterdir() if str(f).endswith(fq_ext)]
barcodes_paths[barcode].extend(list(map(str, fastq_paths)))
output_dir.mkdir(parents=True, exist_ok=True)
for barcode, fastq_paths in barcodes_paths.items():
if fastq_paths:
output_path = output_dir / f"{barcode}{fq_ext}"
if output_path.exists():
output_path.unlink()
subprocess.run(f"cat {' '.join(fastq_paths)} > {output_path}", shell=True)
print(f"Created {output_path} from files {fastq_paths}", file=sys.stderr)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Concatenate demultiplexed FASTQs by barcode for one or more ONT runs.')
parser.add_argument('fastq_pass_dirs', nargs='+', type=Path, help='Directories containing FASTQ files.')
parser.add_argument('-o', '--output-dir', type=Path, default=Path('.'), help='Output directory for concatenated FASTQ files.')
args = parser.parse_args()
concatenate_fastqs_by_barcode(args.fastq_pass_dirs, args.output_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment