Skip to content

Instantly share code, notes, and snippets.

@e-mon
Last active September 10, 2019 02:29
Show Gist options
  • Save e-mon/b2d3694218e90ac6d5b3d29793290a1f to your computer and use it in GitHub Desktop.
Save e-mon/b2d3694218e90ac6d5b3d29793290a1f to your computer and use it in GitHub Desktop.
def re_ranking(model, probFea,galFea,k1,k2,lambda_value):
query_num = probFea.shape[0]
all_num = query_num + galFea.shape[0]
feat = np.append(probFea,galFea,axis = 0)
feat = feat.astype(np.float16)
feat = torch.from_numpy(feat).half().cuda()
print('computing original distance')
sz = feat.shape[0]
d = []
model.eval()
with torch.no_grad():
m = model.module if isinstance(model,FP16) else model
m = m.module if isinstance(m,nn.DataParallel) else m
for i in tqdm_notebook(range(sz)):
preds = m.get_d(feat[i],feat)
d.append(preds)
original_dist = np.stack(d)
del feat
gallery_num = original_dist.shape[0]
original_dist = np.transpose(original_dist/np.max(original_dist,axis = 0))
V = np.zeros_like(original_dist).astype(np.float16)
initial_rank = np.argsort(original_dist).astype(np.int32)
print('starting re_ranking')
for i in range(all_num):
# k-reciprocal neighbors
forward_k_neigh_index = initial_rank[i,:k1+1]
backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1]
fi = np.where(backward_k_neigh_index==i)[0]
k_reciprocal_index = forward_k_neigh_index[fi]
k_reciprocal_expansion_index = k_reciprocal_index
for j in range(len(k_reciprocal_index)):
candidate = k_reciprocal_index[j]
candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2))+1]
candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2))+1]
fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0]
candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate]
if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> (2/3)*len(candidate_k_reciprocal_index):
k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index)
k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
weight = np.exp(-original_dist[i,k_reciprocal_expansion_index])
V[i,k_reciprocal_expansion_index] = weight/np.sum(weight)
original_dist = original_dist[:query_num,]
if k2 != 1:
V_qe = np.zeros_like(V,dtype=np.float16)
for i in range(all_num):
V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0)
V = V_qe
del V_qe
del initial_rank
invIndex = []
for i in range(gallery_num):
invIndex.append(np.where(V[:,i] != 0)[0])
jaccard_dist = np.zeros_like(original_dist,dtype = np.float16)
for i in range(query_num):
temp_min = np.zeros(shape=[1,gallery_num],dtype=np.float16)
indNonZero = np.where(V[i,:] != 0)[0]
indImages = []
indImages = [invIndex[ind] for ind in indNonZero]
for j in range(len(indNonZero)):
temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]])
jaccard_dist[i] = 1-temp_min/(2-temp_min)
final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value
del original_dist
del V
del jaccard_dist
final_dist = final_dist[:query_num,query_num:]
print('re-rank done..')
return final_dist
def get_val_reciprocal_nbs(model,emb_df,out='val.csv',dcut=None, k=16):
emb_df = emb_df.copy()
data = pd.read_csv(LABELS).set_index('Image')
emb_df['emb'] = [[float(i) for i in s.split()] for s in emb_df['emb']]
emb_df.set_index('files',inplace=True)
train_df = data.join(emb_df)
train_df = train_df.reset_index()
#the split should be the same as one used for training
trn_df, val_df = train_test_split(train_df,test_size=0.2, random_state=42)
trn_preds = np.array(trn_df.emb.tolist())
val_preds = np.array(val_df.emb.tolist())
trn_df = trn_df.reset_index()
val_df = val_df.reset_index()
trn_preds = torch.from_numpy(trn_preds).half().cuda()
val_preds = torch.from_numpy(val_preds).half().cuda()
ds = re_ranking(learner.model, val_preds, trn_preds, k1=16, k2=3, lambda_value=0.3)
argsorted = ds.argsort()
trn_idxs = argsorted[:, :k]
trn_d = np.vstack([ds[i, trn_idxs[i, :]] for i in range(trn_idxs.shape[0])])
s = []
for l1 in trn_d.tolist():
s.append(' '.join([str(l2) for l2 in l1]))
val_df['d'] = s
val_df['nbs'] = [' '.join(trn_df.loc[trn_idxs[index]].Id.tolist()) \
for index, row in val_df.iterrows()]
val_df[['Image','Id','nbs','d']].to_csv(out, header=True, index=False)
if dcut is not None:
scores = []
for idx in val_df.index:
l0 = val_df.loc[idx].Id
nbs = dict()
for i in range(k): #16 neighbors
nb = trn_idxs[idx,i]
l, s = trn_df.loc[nb].Id, trn_d[idx,i]
if s > dcut and 'new_whale' not in nbs: nbs['new_whale'] = dcut
if l not in nbs: nbs[l] = s
if len(nbs) >= 5: break
nbs_sorted = list(nbs.items())
score = 0.0
for i in range(min(len(nbs),5)):
if nbs_sorted[i][0] == l0:
score = 1.0/(i + 1.0)
break
scores.append(score)
print(np.array(scores).mean(), flush=True)
return
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment