sacdallago · July 12, 2020 15:46
diff --git a/Readme.md b/Readme.md
diff --git a/convert_txt_to_csv_with_representatives.py b/convert_txt_to_csv_with_representatives.py
 #!/bin/python

 import pandas as pd
 from pandas import read_csv

 clusters = read_csv('goa_swissprot_2020_100.fasta.clstr.txt', sep="\t")

 single_clusters = clusters[clusters['clstr_size'] == 1]
 multi_clusters = clusters[clusters['clstr_size'] != 1]

 single_clusters['representitive_id'] = single_clusters['id']


 def assign_cluster_representitive(cluster):
  representative = cluster[cluster['clstr_rep'] == 1]
  multi_clusters.loc[cluster.index,'representitive_id'] = representative['id'].values[0]

 for cluster_name, cluster in multi_clusters.groupby('clstr'):
  assign_cluster_representitive(cluster)

 clusters = pd.concat([single_clusters, multi_clusters])

 clusters.to_csv('goa_swissprot_2020_100_with_representatives.clstr.txt', index=False)
	#!/bin/python

	import pandas as pd
	from pandas import read_csv

	clusters = read_csv('goa_swissprot_2020_100.fasta.clstr.txt', sep="\t")

	single_clusters = clusters[clusters['clstr_size'] == 1]
	multi_clusters = clusters[clusters['clstr_size'] != 1]

	single_clusters['representitive_id'] = single_clusters['id']


	def assign_cluster_representitive(cluster):
	representative = cluster[cluster['clstr_rep'] == 1]
	multi_clusters.loc[cluster.index,'representitive_id'] = representative['id'].values[0]

	for cluster_name, cluster in multi_clusters.groupby('clstr'):
	assign_cluster_representitive(cluster)

	clusters = pd.concat([single_clusters, multi_clusters])

	clusters.to_csv('goa_swissprot_2020_100_with_representatives.clstr.txt', index=False)