Skip to content

Instantly share code, notes, and snippets.

@marcelm
Created March 7, 2023 09:42
Show Gist options
  • Save marcelm/20d847badeadf6d9c7b8b11e6115d892 to your computer and use it in GitHub Desktop.
Save marcelm/20d847badeadf6d9c7b8b11e6115d892 to your computer and use it in GitHub Desktop.
Quality trimming experiment
#!/usr/bin/env python3
"""Quality trimming using a running sum from the 5' to 3' end"""
import sys
from argparse import ArgumentParser
import dnaio
def qual_trim_index(qualities_ascii, threshold):
qualities = [ord(c) - 33 for c in qualities_ascii]
score = 0
best_end = 0
best_score = 0
for i, q in enumerate(qualities):
score += q - threshold
if score > best_score:
best_end = i + 1
best_score = score
return best_end
def main():
parser = ArgumentParser()
parser.add_argument("-q", dest="threshold", metavar="THRESHOLD", type=int, default=30, help="Quality trimming threshold")
parser.add_argument("fastq")
args = parser.parse_args()
threshold = args.threshold
with dnaio.open(args.fastq) as infile:
with dnaio.open(sys.stdout.buffer, mode="w", qualities=True) as outfile:
for record in infile:
end = qual_trim_index(record.qualities, threshold)
record = record[:end]
outfile.write(record)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment