Skip to content

Instantly share code, notes, and snippets.

@huddlej
Created June 21, 2023 23:29
Show Gist options
  • Save huddlej/59d9ed902367427dc870bf27834c9b84 to your computer and use it in GitHub Desktop.
Save huddlej/59d9ed902367427dc870bf27834c9b84 to your computer and use it in GitHub Desktop.
Simplified script to find "polytomies" of a minimum size in a given Newick tree
#!/usr/bin/env python3
import argparse
import Bio.Phylo
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Find polytomies in a given tree",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--tree", required=True, help="Newick tree")
parser.add_argument("--min-tips", type=int, default=3, help="minimum tips per polytomy to be consider as a cluster")
parser.add_argument("--max-branch-length", type=int, default=0, help="longest branch length per tip to be considered part of a polytomy")
parser.add_argument("--output", required=True, help="tab-delimited file with strain and polytomy id")
args = parser.parse_args()
# Read in the UShER tree in Newick format.
tree = Bio.Phylo.read(args.tree, "newick")
# Write a TSV of strain name and unique numeric polytomy id.
with open(args.output, "w", encoding="utf-8") as oh:
print("strain\tpolytomy_id", file=oh)
polytomy_id = 0
for node in tree.find_clades(terminal=False):
if node == tree.root:
continue
# Collect all tips directly descended from the current node on a
# branch no longer than the max branch length.
tips = []
for child in node.clades:
if child.is_terminal() and child.branch_length <= args.max_branch_length:
tips.append(child.name)
# Record the tips for this node's polytomy, if there are enough tips
# with the minimum branch length.
if len(tips) >= args.min_tips:
for tip in tips:
print(f"{tip}\t{polytomy_id}", file=oh)
polytomy_id += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment