mconcas · January 18, 2018 09:16
diff --git a/clusterer.py b/clusterer.py
 #! /usr/bin/env python

 import numpy as np
 import sys
 import pprint
 from sklearn.cluster import dbscan

 pars = sys.argv
 if len(pars) != 5:
 	print("\tUsage: ./clusterer.py [filename] [epsilon] [min_samples] [output]")
 	sys.exit(-1)

 pp = pprint.PrettyPrinter(indent=2)

 # Notable SO goodie:
 # https://stackoverflow.com/questions/3844801/check-if-all-elements-in-a-list-are-identical

 # List of pixels:
 #   pixel: [x, y, pID, MCl]
 pixels = np.loadtxt(pars[1])
 core_samples, cl_labels = dbscan(pixels, eps=float(pars[2]), min_samples=int(pars[3]),
                                  metric='euclidean')

 # List of clusters:
 #  cluster: { ClID: [pixel, ...] }
 clusters = {}

 # Fill a dictionary with clusters data
 for pos,label in enumerate(cl_labels):
  # Dictionary of clusters does not contain any entry with corresponding cl_label
  if label not in clusters:
    clusters[label] = [list(pixels[pos])]
  # Label already present, append new pixel to pixel list
  else:
    clusters[label].append(list(pixels[pos]))

 # Skim data:
 #   Create on the fly the list of mc labels of pixels and test whether they contain
 #   all the same labels
 bad_clusters = {k:v for k,v in clusters.iteritems() if not [x[3] for x in v][1:] == [x[3] for x in v][:-1]}
 hom_clusters = {k:v for k,v in clusters.iteritems() if [x[3] for x in v][1:] == [x[3] for x in v][:-1]}
 frg_clusters = {}

 # Iter over skimmed, find duplicates
 for k1,v1 in hom_clusters.iteritems():
  for k2,v2 in hom_clusters.iteritems():
    if v1[0][3] == v2[0][3] and k1 != k2:
      frg_clusters[k1] = v1

 # Remove fragmented from homogeneous, find good clusters
 good_clusters = {k:v for k,v in hom_clusters.iteritems() if not k in frg_clusters.keys()}

 # pp.pprint(hom_clusters)
 # pp.pprint(bad_clusters)
 # pp.pprint(frg_clusters)
 # pp.pprint(good_clusters)

 print("Clusters Found:\n\tTotal:       %d\n\tHomogeneous: %d\n\tBad:         %d\n\tFragmented:  %d\n\tGood:        %d" % (len(clusters), len(hom_clusters), len(bad_clusters), len(frg_clusters), len(good_clusters)))

 with open(pars[4],"a") as of:
    of.write("%.4f %d %d %d %d %d %d" %(float(pars[2]), float(pars[3]), len(clusters), len(hom_clusters), len(bad_clusters), len(frg_clusters), len(good_clusters)))
	#! /usr/bin/env python

	import numpy as np
	import sys
	import pprint
	from sklearn.cluster import dbscan

	pars = sys.argv
	if len(pars) != 5:
	print("\tUsage: ./clusterer.py [filename] [epsilon] [min_samples] [output]")
	sys.exit(-1)

	pp = pprint.PrettyPrinter(indent=2)

	# Notable SO goodie:
	# https://stackoverflow.com/questions/3844801/check-if-all-elements-in-a-list-are-identical

	# List of pixels:
	# pixel: [x, y, pID, MCl]
	pixels = np.loadtxt(pars[1])
	core_samples, cl_labels = dbscan(pixels, eps=float(pars[2]), min_samples=int(pars[3]),
	metric='euclidean')

	# List of clusters:
	# cluster: { ClID: [pixel, ...] }
	clusters = {}

	# Fill a dictionary with clusters data
	for pos,label in enumerate(cl_labels):
	# Dictionary of clusters does not contain any entry with corresponding cl_label
	if label not in clusters:
	clusters[label] = [list(pixels[pos])]
	# Label already present, append new pixel to pixel list
	else:
	clusters[label].append(list(pixels[pos]))

	# Skim data:
	# Create on the fly the list of mc labels of pixels and test whether they contain
	# all the same labels
	bad_clusters = {k:v for k,v in clusters.iteritems() if not [x[3] for x in v][1:] == [x[3] for x in v][:-1]}
	hom_clusters = {k:v for k,v in clusters.iteritems() if [x[3] for x in v][1:] == [x[3] for x in v][:-1]}
	frg_clusters = {}

	# Iter over skimmed, find duplicates
	for k1,v1 in hom_clusters.iteritems():
	for k2,v2 in hom_clusters.iteritems():
	if v1[0][3] == v2[0][3] and k1 != k2:
	frg_clusters[k1] = v1

	# Remove fragmented from homogeneous, find good clusters
	good_clusters = {k:v for k,v in hom_clusters.iteritems() if not k in frg_clusters.keys()}

	# pp.pprint(hom_clusters)
	# pp.pprint(bad_clusters)
	# pp.pprint(frg_clusters)
	# pp.pprint(good_clusters)

	print("Clusters Found:\n\tTotal: %d\n\tHomogeneous: %d\n\tBad: %d\n\tFragmented: %d\n\tGood: %d" % (len(clusters), len(hom_clusters), len(bad_clusters), len(frg_clusters), len(good_clusters)))

	with open(pars[4],"a") as of:
	of.write("%.4f %d %d %d %d %d %d" %(float(pars[2]), float(pars[3]), len(clusters), len(hom_clusters), len(bad_clusters), len(frg_clusters), len(good_clusters)))