Last active
September 14, 2015 16:21
-
-
Save paultsw/592482b68d5a76ec574a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function dist(p,q) { | |
/* | |
Euclidean distance function between two points. | |
*/ | |
return Math.sqrt((p.x - q.x)^2, (p.y - q.y)^2); | |
} | |
function average(datapts) { | |
/* | |
Takes an array of datapoints, where each | |
datapoint is of the form {"x": Number, "y": Number}, | |
and returns the average point in the middle. | |
*/ | |
var x_mid = datapts.reduce(function(a,b) { | |
return a.x + b.x; | |
}, 0); | |
var y_mid = datapts.reduce(function(a,b) { | |
return a.y + b.y; | |
}, 0); | |
return { | |
"x": (x_mid / datapts.length), | |
"y": (y_mid / datapts.length) | |
}; | |
} | |
function assign(centroids, assignments, datapts) { | |
/* | |
Takes: | |
a list of centroids of length k, each a JSON containing x and y coords.; | |
a list of assignments, each entry an integer from 0 to k-1, | |
of the same length as datapts; | |
a list of datapts, of length n, each a JSON containing x and y coords. | |
Returns: | |
an updated assignments list that performs one iteration of re-assignment. | |
*/ | |
// list of centroid assignments for each datapoint: | |
var newAssignments = []; | |
for (var j = 0; j < datapts.length; j++) { | |
// compute the distance between the datapoint and centroids: | |
var currAssn = assignments[j]; | |
var distances = centroids.map(function (centroid) { | |
return dist(datapts[j],centroid); | |
}); | |
// find the index that minimizes distance | |
var minIndex = distances.indexOf(Math.min.apply(Math,distances)); | |
// append to new assignments | |
newAssignments.push(minIndex); | |
} | |
// return new assignments list | |
return newAssignments; | |
} | |
function kmeans(k, data) { | |
/* | |
Actual k-means algorithm, implemented. | |
*/ | |
// create k random datapoints to act as the centroids | |
var centroids = []; | |
for (var i = 0; i < k; i++) { | |
centroids.push({ | |
"x": Math.random() * 100, | |
"y": Math.random() * 100 | |
}); | |
} | |
// generate initial list of centroid assignments for each datapoint: | |
var nList = new Array(data.length + 1).join('0').split('').map(parseFloat); // empty array of zeroes | |
var assignments = assign(centroids, nList, data); | |
// repeat re-centering process until it stabilizes | |
var stable = false; | |
while (!stable) { | |
var currAssignments = assignments; | |
// assign each datapoint to its closest centroid: | |
assignments = assign(centroids, assignments, data); | |
// move centroids to average position of all their assigned datapts: | |
centroids.map(function (centroid, index) { | |
// get list of all datapoints that a given centroid owns: | |
var assignedDataPts = []; | |
for (var i = 0; i < data.length; i++) { | |
if (assignments[i] == index) { | |
assignedDataPts.push(data[i]); | |
} | |
} | |
// return computed averages: | |
return average(assignedDataPts); | |
}); | |
// check if stable: | |
if (currAssignments == assignments) | |
stable = true; | |
} | |
// return results | |
return { | |
"centroids": centroids, | |
"assignments": assignments, | |
"datapts": data | |
}; | |
} | |
(function () { | |
/* | |
Run demonstration and print to console. | |
*/ | |
// example dataset; all numbers bounded between 0 and 100. | |
// data points chosen to be surround two clusters. | |
var data = [ | |
{ | |
"x": 23, | |
"y": 32 | |
}, | |
{ | |
"x": 66, | |
"y": 11 | |
}, | |
{ | |
"x": 25, | |
"y": 35 | |
}, | |
{ | |
"x": 20, | |
"y": 29 | |
}, | |
{ | |
"x": 28, | |
"y": 28 | |
}, | |
{ | |
"x": 70, | |
"y": 10 | |
}, | |
{ | |
"x": 59, | |
"y": 13 | |
}, | |
{ | |
"x": 25, | |
"y": 33 | |
} | |
]; | |
console.log(kmeans(2,data)); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment