Skip to content

Instantly share code, notes, and snippets.

@adalke
Last active August 28, 2021 19:19
Show Gist options
  • Save adalke/4f1ae8a55699edaccecb8bd2992307d8 to your computer and use it in GitHub Desktop.
Save adalke/4f1ae8a55699edaccecb8bd2992307d8 to your computer and use it in GitHub Desktop.
Find a set of 64-bit fingerprints with no ties in the NxN upper-triangular Tanimoto
# Find a set of 64-bit fingerprints with no ties in the NxN upper-triangular Tanimoto
# Andrew Dalke <dalke@dalkescientific.com>
# 28 August 2021
# Distributed under the terms of the MIT License
# https://opensource.org/licenses/MIT
# See also https://gist.github.com/ihaque/ee2f6de2091b5755219d8533b9bdfd15
import random
from chemfp import bitops
import time
"""
# Found 27 351
010ca5212bb73967 id1
222257801a012513 id2
0005683000080038 id3
efffff7fffffffff id4
fbfbdfffedfbdeff id5
21602a5034b064a0 id6
7d01cc7d85503009 id7
e7ff2efbfa723f4d id8
dfe9bcffddffefff id9
11eaf6836d1a1b13 id10
356c55eff7decef9 id11
b6bfdfda73f7faff id12
0200000000002122 id13
dfdd7fdf7f7dfeff id14
12449480c4c7cb1c id15
67efdeff75fbbe4a id16
d0ae48211cc31910 id17
77e23fb4dfd848b1 id18
53c4fffbe5fbf4fb id19
b51f9cbffede9f10 id20
0100200100000281 id21
f7f60d895cc39d5f id22
f997c59d6c75067a id23
eeffffffffffdf7f id24
0003d010581e0813 id25
914d85e6e9f9566b id26
9d6f3f77b74a779c id27
"""
def search(best, timeout):
t1 = time.time()
N = 64
all_bits = range(N)
### This regularly finds 23 fingerprints in 10 seconds
def get_fp():
n = random.randrange(1, N) # ignore all-on and all-off
return bitops.byte_from_bitlist(random.sample(all_bits, n), N)
### This is significantly worse.
### It finds about 18 fingerprints in 10 seconds.
## def get_fp():
## n = random.randrange(1, 2**64) # ignore all-on and all-off
## return n.to_bytes(8, "big")
# Choose the initial fingerprint.
seen = set()
fps = [get_fp()]
while 1:
t2 = time.time()
if (t2 - t1) > timeout:
break
# Make a new fingerprint
new_fp = get_fp()
# Check that all of its Tanimoto scores are unique
new_seen = set()
for prev_fp in fps:
t = bitops.byte_tanimoto(prev_fp, new_fp)
if t in seen:
break
if t in new_seen:
break
new_seen.add(t)
else:
# Add it
seen.update(new_seen)
fps.append(new_fp)
if len(fps) > best:
# Dump the output in FPS format
print("# Found", len(fps), len(seen))
for i, fp in enumerate(fps, 1):
print(fp.hex(), f"id{i}", sep="\t")
print()
assert len(fps) * (len(fps)-1) // 2 == len(seen)
best = len(fps)
return best
# Keep trying, with a longer and longer timeout.
def main():
best = 0
timeout = 10
while 1:
print("Search for at least:", best, "timeout:", timeout)
new_best = search(best, timeout)
if best == new_best:
timeout = timeout * 3 // 2
else:
best = new_best
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment