markloyman · May 2, 2020 11:13
diff --git a/hubness.py b/hubness.py
 import numpy as np
 from scipy.stats import skew


 def k_occurrences(nbrs_indices):
    """
    Calculate k-occurrences distribution
        Miloš Radovanovi´c et al, 
        Hubs in space: Popular nearest neighbors in high-dimensional data. 
        Journal of Machine Learning Research, 11(Sep):2487–2531, 2010
    :param nbrs_indices: array, shape (n_queries, k_neighbors)
            Indices of the nearest points in the population matrix.
            if using sklearn:
                nbrs = sklearn.neighbors.NearestNeighbors(..).fit(..)
                distances, indices = nbrs.kneighbors(..)
                nbrs_indices = indices[:, 1:]            
    :return: 1d array where k_occ[i] is the number of times query i appears as nearest-neighbour result of other queries
    """ 
    return np.bincount(nbrs_indices.flatten())


 def hubness(nbrs_indices, k_neighbors=None, nonlinear_rescaling=True):
    """
    hubness: 3rd order moment of K-occurrences distribution
        Miloš Radovanovi´c et al, 
        Hubs in space: Popular nearest neighbors in high-dimensional data. 
        Journal of Machine Learning Research, 11(Sep):2487–2531, 2010
    :param nbrs_indices: array, shape (n_queries, k_neighbors)
            Indices of the nearest points in the population matrix.            
            if using sklearn:
                nbrs = sklearn.neighbors.NearestNeighbors(..).fit(..)
                distances, indices = nbrs.kneighbors(..)
                nbrs_indices = indices[:, 1:]                        
    :param k_neighbors: if provided, take the first k columns of nbrs_indices 
    :param nonlinear_rescaling: apply rescaling to the hubness value
    :return: Tuple: hubness value, k-occurrences distribution
    """
    nbrs_indices_ = nbrs_indices if k_neighbors is None else nbrs_indices[:, :k_neighbors]
    k_occ = k_occurrences(nbrs_indices_) 
    distribution = np.bincount(k_occ)
    index = skew(distribution)
    if nonlinear_rescaling:
        index = np.exp(-np.abs(index))
    return index, distribution
	import numpy as np
	from scipy.stats import skew


	def k_occurrences(nbrs_indices):
	"""
	Calculate k-occurrences distribution
	Miloš Radovanovi´c et al,
	Hubs in space: Popular nearest neighbors in high-dimensional data.
	Journal of Machine Learning Research, 11(Sep):2487–2531, 2010
	:param nbrs_indices: array, shape (n_queries, k_neighbors)
	Indices of the nearest points in the population matrix.
	if using sklearn:
	nbrs = sklearn.neighbors.NearestNeighbors(..).fit(..)
	distances, indices = nbrs.kneighbors(..)
	nbrs_indices = indices[:, 1:]
	:return: 1d array where k_occ[i] is the number of times query i appears as nearest-neighbour result of other queries
	"""
	return np.bincount(nbrs_indices.flatten())


	def hubness(nbrs_indices, k_neighbors=None, nonlinear_rescaling=True):
	"""
	hubness: 3rd order moment of K-occurrences distribution
	Miloš Radovanovi´c et al,
	Hubs in space: Popular nearest neighbors in high-dimensional data.
	Journal of Machine Learning Research, 11(Sep):2487–2531, 2010
	:param nbrs_indices: array, shape (n_queries, k_neighbors)
	Indices of the nearest points in the population matrix.
	if using sklearn:
	nbrs = sklearn.neighbors.NearestNeighbors(..).fit(..)
	distances, indices = nbrs.kneighbors(..)
	nbrs_indices = indices[:, 1:]
	:param k_neighbors: if provided, take the first k columns of nbrs_indices
	:param nonlinear_rescaling: apply rescaling to the hubness value
	:return: Tuple: hubness value, k-occurrences distribution
	"""
	nbrs_indices_ = nbrs_indices if k_neighbors is None else nbrs_indices[:, :k_neighbors]
	k_occ = k_occurrences(nbrs_indices_)
	distribution = np.bincount(k_occ)
	index = skew(distribution)
	if nonlinear_rescaling:
	index = np.exp(-np.abs(index))
	return index, distribution