Skip to content

Instantly share code, notes, and snippets.

@robintux
Last active September 4, 2024 12:48
Show Gist options
  • Save robintux/e9e5b4b35b7454de190dd7227dfeff23 to your computer and use it in GitHub Desktop.
Save robintux/e9e5b4b35b7454de190dd7227dfeff23 to your computer and use it in GitHub Desktop.
import warnings
import random
import numpy as np
from sklearn.neighbors import NearestNeighbors
def smote(T, N, K):
"""
T ~ an array-like object representing the minority matrix
N ~ the percent oversampling you want. e.g. 500 will give you 5 samples
from the SMOTE algorithm (thus, has to be multiple of 100).
K ~ K Nearest Neighbors
"""
## make sure T is an array with the proper dimensions
T = np.asarray(T, dtype = np.float64)
nsamples = T.shape[0]
nfeatures = T.shape[1]
if nsamples < nfeatures:
warnings.warn("Make sure the features are in the columns.")
## we want to oversample
if N < 100:
raise Exception("N should be at least 100")
N = int(N) / 100
nn = NearestNeighbors(K)
nn.fit(T)
synthetic = np.zeros([N * nsamples, nfeatures])
for sample in xrange(nsamples):
nn_minority = nn.kneighbors(T[sample], return_distance = False)[0]
N_next = N
newindex = 0
while N_next != 0:
k_chosen = random.randint(0, K - 1)
while nn_minority[k_chosen] == sample: # don't pick itself
k_chosen = random.randint(0, K - 1)
for feature in xrange(nfeatures):
diff = T[nn_minority[k_chosen], feature] - T[sample, feature]
gap = random.uniform(0, 1)
synthetic[N*sample + newindex, feature] = T[sample, feature] + gap * diff
newindex += 1
N_next -= 1
return synthetic
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment