Skip to content

Instantly share code, notes, and snippets.

@mconcas
Last active January 18, 2018 09:16
Show Gist options
  • Save mconcas/721c7a0f43a49f089ec4e640592b52ff to your computer and use it in GitHub Desktop.
Save mconcas/721c7a0f43a49f089ec4e640592b52ff to your computer and use it in GitHub Desktop.
Usare con: `./clusterer.py [filename] [epsilon] [min_samples]`
#! /usr/bin/env python
import numpy as np
import sys
import pprint
from sklearn.cluster import dbscan
pars = sys.argv
if len(pars) != 5:
print("\tUsage: ./clusterer.py [filename] [epsilon] [min_samples] [output]")
sys.exit(-1)
pp = pprint.PrettyPrinter(indent=2)
# Notable SO goodie:
# https://stackoverflow.com/questions/3844801/check-if-all-elements-in-a-list-are-identical
# List of pixels:
# pixel: [x, y, pID, MCl]
pixels = np.loadtxt(pars[1])
core_samples, cl_labels = dbscan(pixels, eps=float(pars[2]), min_samples=int(pars[3]),
metric='euclidean')
# List of clusters:
# cluster: { ClID: [pixel, ...] }
clusters = {}
# Fill a dictionary with clusters data
for pos,label in enumerate(cl_labels):
# Dictionary of clusters does not contain any entry with corresponding cl_label
if label not in clusters:
clusters[label] = [list(pixels[pos])]
# Label already present, append new pixel to pixel list
else:
clusters[label].append(list(pixels[pos]))
# Skim data:
# Create on the fly the list of mc labels of pixels and test whether they contain
# all the same labels
bad_clusters = {k:v for k,v in clusters.iteritems() if not [x[3] for x in v][1:] == [x[3] for x in v][:-1]}
hom_clusters = {k:v for k,v in clusters.iteritems() if [x[3] for x in v][1:] == [x[3] for x in v][:-1]}
frg_clusters = {}
# Iter over skimmed, find duplicates
for k1,v1 in hom_clusters.iteritems():
for k2,v2 in hom_clusters.iteritems():
if v1[0][3] == v2[0][3] and k1 != k2:
frg_clusters[k1] = v1
# Remove fragmented from homogeneous, find good clusters
good_clusters = {k:v for k,v in hom_clusters.iteritems() if not k in frg_clusters.keys()}
# pp.pprint(hom_clusters)
# pp.pprint(bad_clusters)
# pp.pprint(frg_clusters)
# pp.pprint(good_clusters)
print("Clusters Found:\n\tTotal: %d\n\tHomogeneous: %d\n\tBad: %d\n\tFragmented: %d\n\tGood: %d" % (len(clusters), len(hom_clusters), len(bad_clusters), len(frg_clusters), len(good_clusters)))
with open(pars[4],"a") as of:
of.write("%.4f %d %d %d %d %d %d" %(float(pars[2]), float(pars[3]), len(clusters), len(hom_clusters), len(bad_clusters), len(frg_clusters), len(good_clusters)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment