Last active
September 10, 2016 12:58
-
-
Save JohnLonginotto/7909b2c01da0273bb1f78e706070b481 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
reads = sys.argv[1] | |
index = sys.argv[2] | |
all_readIDs = set() | |
# Re-written the code below to only ever read the first row: | |
with open(reads, "r") as read_file: | |
while True: | |
try: | |
line1 = next(read_file) | |
if line1.startswith("@HWI"): all_readIDs.add(line1) | |
next(read_file) # We walk through the | |
next(read_file) # next 3 lines but don't | |
next(read_file) # save the data anywhere. | |
except StopIteration: | |
break | |
# This hasn't changed: | |
if not all_readIDs: print "List is Empty" | |
else: | |
for idx,item in enumerate(all_readIDs): | |
print item | |
if idx == 10: break | |
# Rewritten to not skip any rows and write all 4 rows to a file if the ID line in the ID set: | |
with open(index, "r") as index_file, open("filtered_index.fastq", "w") as filtered_file: | |
while True: | |
try: | |
line1 = next(index_file) | |
line2 = next(index_file) | |
line3 = next(index_file) | |
line4 = next(index_file) | |
except StopIteration: | |
break | |
if line1 in all_readIDs: | |
filtered_file.write(line1) | |
filtered_file.write(line2) | |
filtered_file.write(line3) | |
filtered_file.write(line4) | |
print "All done!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment