Skip to content

Instantly share code, notes, and snippets.

@alarrosa14
Last active November 12, 2015 01:07
Show Gist options
  • Save alarrosa14/29441e55c53e42b562cc to your computer and use it in GitHub Desktop.
Save alarrosa14/29441e55c53e42b562cc to your computer and use it in GitHub Desktop.
A Kmeans implementation using posix threads. Generates a Octave script to plot the output.
#include <pthread.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include <float.h>
#include <stdio.h>
#include <sys/types.h>
#define CANT_THREADS 8
#define N 300000
#define K 100
#define TOLERANCE 0.00001
#define SEED 11
typedef struct {
double x;
double y;
} Point;
Point* data;
Point* means;
int* groupForPoint;
double error;
int global_p, global_k;
pthread_mutex_t mutex_iterators, mutex_error;
pthread_barrier_t barrier;
void generateOctaveScript(int step) {
int i, j, first;
printf("# STEP %d\n", step);
printf("function step%d()\n", step);
for (i = 0; i < K; i++) {
first = 0;
printf("means(%d, :) = [%f %f];\n", i+1, means[i].x, means[i].y);
for (j = 0; j < N; j++) {
if (groupForPoint[j] == i) {
if (first == 0){
first++;
printf("data%d = [", i+1);
}
printf("[%f %f] ; ", data[j].x, data[j].y);
}
}
printf("];\n");
}
printf("plot(");
for (i = 1; i <= K; i++) {
printf("means(%d, 1), means(%d, 2), '.', data%d(:,1), data%d(:,2), '+'", i, i, i, i);
if (i < K)
printf(",");
}
printf(");\n");
printf("endfunction\n");
}
double distance(Point p1, Point p2) {
Point diff;
diff.x = p1.x - p2.x;
diff.y = p1.y - p2.y;
return (double) sqrt(diff.x*diff.x + diff.y*diff.y);
}
void* threadedFunction(void* args) {
int k;
int closerGroup;
double dist, minDist;
Point pAdd;
int n, p;
pthread_mutex_lock(&mutex_iterators);
while (global_p < N) {
p = global_p;
global_p = global_p + 1;
pthread_mutex_unlock(&mutex_iterators);
minDist = DBL_MAX;
closerGroup = 0;
for (k = 0; k < K; k++) {
dist = distance(data[p], means[k]);
if (dist < minDist) {
minDist = dist;
closerGroup = k;
}
}
pthread_mutex_lock(&mutex_error);
error += minDist;
pthread_mutex_unlock(&mutex_error);
groupForPoint[p] = closerGroup;
pthread_mutex_lock(&mutex_iterators);
}
pthread_mutex_unlock(&mutex_iterators);
pthread_barrier_wait(&barrier);
pthread_mutex_lock(&mutex_iterators);
while (global_k < K) {
k = global_k++;
pthread_mutex_unlock(&mutex_iterators);
pAdd.x = 0; pAdd.y = 0;
n = 0;
for (p = 0; p < N; p++) {
if (groupForPoint[p] == k) {
pAdd.x += data[p].x;
pAdd.y += data[p].y;
n++;
}
}
if (n > 0) {
means[k].x = pAdd.x / n;
means[k].y = pAdd.y / n;
}
pthread_mutex_lock(&mutex_iterators);
}
pthread_mutex_unlock(&mutex_iterators);
pthread_barrier_wait(&barrier);
pthread_exit(0);
}
int kmeans() {
pthread_t kmeans_t[CANT_THREADS];
pthread_mutex_init(&mutex_iterators, NULL);
pthread_mutex_init(&mutex_error, NULL);
pthread_barrier_init(&barrier, NULL, CANT_THREADS);
int i;
double prevError;
error = DBL_MAX;
groupForPoint = (int*) malloc(N*sizeof(int));
int iter;
iter= 0;
do {
//printf("Step %d\n", iter);
prevError = error;
error = 0;
global_p = 0;
global_k = 0;
for (i=0; i<CANT_THREADS; i++) {
int err = pthread_create(&kmeans_t[i], NULL, threadedFunction, NULL);
if (err != 0)
printf("Error creando el thread %d\n", i);
}
pthread_join(kmeans_t[0], NULL);
iter++;
} while (fabs(error - prevError) > TOLERANCE);
pthread_mutex_destroy(&mutex_iterators);
pthread_mutex_destroy(&mutex_error);
pthread_barrier_destroy(&barrier);
return iter;
}
int main(void) {
int i;
data = (Point*)malloc(N*sizeof(Point));
means = (Point*)malloc(K*sizeof(Point));
srand(SEED);
/* Cargamos puntos */
for (i = 0; i < N; i++) {
data[i].x = (double)rand()/RAND_MAX;
data[i].y = (double)rand()/RAND_MAX;
}
/* Inicializamos las medias */
for (i = 0; i < K; i++) {
means[i].x = (double)rand()/RAND_MAX;
means[i].y = (double)rand()/RAND_MAX;
}
int iterations = kmeans();
generateOctaveScript(iterations);
free(groupForPoint);
free(data);
free(means);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment