Last active
June 17, 2020 02:52
-
-
Save jdeng/d2c538e4cab6dd75bf34 to your computer and use it in GitHub Desktop.
clustering by fast search and find of density peak
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// generate [0..n-1] | |
auto seq = [](size_t n) -> std::vector<size_t> { | |
std::vector<size_t> v(n); | |
for (size_t i=0; i<n; ++i) v[i] = i; | |
return v; | |
}; | |
auto index = seq(n); | |
// n * n distance matrix | |
std::vector<D> dists(n * n); | |
for (size_t i=0; i<n-1; ++i) { | |
for (size_t j=i+1; j<n; ++j) { dists[i * n + j] = dists[j * n + i] = distf(values[i], values[j]); } | |
} | |
auto dist = [&](size_t i, size_t j) { return dists[i * n + j]; } | |
// calculate rho & delta | |
std::vector<size_t> rho(n); | |
for (size_t i=0; i<n; ++i) { | |
rho[i] = std::count_if(index.begin(), index.end(), [&](size_t j) { return dist(i,j) < dist_cutoff; }); | |
} | |
std::vector<D> delta(n); | |
for (size_t i=0; i<n; ++i) { | |
auto it = std::min_element_if(index.begin(), index.end(), [&](size_t j, size_t k) { return dist(i,j) < dist(i,k); }, [&](size_t j) { return rho[j] > rho[i]; }); | |
if (it == index.end()) | |
it = std::max_element(index.begin(), index.end(), [&](size_t j, size_t k) { return dist(i,j) < dist(i,k); }); | |
delta[i] = dist(i, *it); | |
} | |
//... | |
// clustering | |
auto dindex = seq(n*n); | |
std::sort(dindex.begin(), dindex.end(), [&](size_t i, size_t j) { return dists[i] < dists[j]; }); | |
while (true) { | |
auto it = std::find_if(dindex.begin(), dindex.end(), [&](size_t x) { | |
size_t i = x / n, j = x % n; | |
return (i != j) && ((labels[i] != -1 && labels[j] == -1 && rho[i] > rho[j]) || (labels[i] == -1 && labels[j] != -1 && rho[j] > rho[i])); | |
}); | |
if (it == dindex.end()) break; | |
size_t x = *it, i = x / n, j = x % n; | |
if (labels[i] != -1) labels[j] = labels[i]; | |
else labels[i] = labels[j]; | |
} |
这是什么语言,什么语法?
C++ 11, FYI
这个可以计算的点的数量的极限是多少?大约在那个量级,如果是在大约5000个点,效率会如何?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The clustering could be implemented more efficiently with for example a priority queue.