Last active
August 28, 2021 19:19
-
-
Save adalke/4f1ae8a55699edaccecb8bd2992307d8 to your computer and use it in GitHub Desktop.
Find a set of 64-bit fingerprints with no ties in the NxN upper-triangular Tanimoto
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Find a set of 64-bit fingerprints with no ties in the NxN upper-triangular Tanimoto | |
# Andrew Dalke <dalke@dalkescientific.com> | |
# 28 August 2021 | |
# Distributed under the terms of the MIT License | |
# https://opensource.org/licenses/MIT | |
# See also https://gist.github.com/ihaque/ee2f6de2091b5755219d8533b9bdfd15 | |
import random | |
from chemfp import bitops | |
import time | |
""" | |
# Found 27 351 | |
010ca5212bb73967 id1 | |
222257801a012513 id2 | |
0005683000080038 id3 | |
efffff7fffffffff id4 | |
fbfbdfffedfbdeff id5 | |
21602a5034b064a0 id6 | |
7d01cc7d85503009 id7 | |
e7ff2efbfa723f4d id8 | |
dfe9bcffddffefff id9 | |
11eaf6836d1a1b13 id10 | |
356c55eff7decef9 id11 | |
b6bfdfda73f7faff id12 | |
0200000000002122 id13 | |
dfdd7fdf7f7dfeff id14 | |
12449480c4c7cb1c id15 | |
67efdeff75fbbe4a id16 | |
d0ae48211cc31910 id17 | |
77e23fb4dfd848b1 id18 | |
53c4fffbe5fbf4fb id19 | |
b51f9cbffede9f10 id20 | |
0100200100000281 id21 | |
f7f60d895cc39d5f id22 | |
f997c59d6c75067a id23 | |
eeffffffffffdf7f id24 | |
0003d010581e0813 id25 | |
914d85e6e9f9566b id26 | |
9d6f3f77b74a779c id27 | |
""" | |
def search(best, timeout): | |
t1 = time.time() | |
N = 64 | |
all_bits = range(N) | |
### This regularly finds 23 fingerprints in 10 seconds | |
def get_fp(): | |
n = random.randrange(1, N) # ignore all-on and all-off | |
return bitops.byte_from_bitlist(random.sample(all_bits, n), N) | |
### This is significantly worse. | |
### It finds about 18 fingerprints in 10 seconds. | |
## def get_fp(): | |
## n = random.randrange(1, 2**64) # ignore all-on and all-off | |
## return n.to_bytes(8, "big") | |
# Choose the initial fingerprint. | |
seen = set() | |
fps = [get_fp()] | |
while 1: | |
t2 = time.time() | |
if (t2 - t1) > timeout: | |
break | |
# Make a new fingerprint | |
new_fp = get_fp() | |
# Check that all of its Tanimoto scores are unique | |
new_seen = set() | |
for prev_fp in fps: | |
t = bitops.byte_tanimoto(prev_fp, new_fp) | |
if t in seen: | |
break | |
if t in new_seen: | |
break | |
new_seen.add(t) | |
else: | |
# Add it | |
seen.update(new_seen) | |
fps.append(new_fp) | |
if len(fps) > best: | |
# Dump the output in FPS format | |
print("# Found", len(fps), len(seen)) | |
for i, fp in enumerate(fps, 1): | |
print(fp.hex(), f"id{i}", sep="\t") | |
print() | |
assert len(fps) * (len(fps)-1) // 2 == len(seen) | |
best = len(fps) | |
return best | |
# Keep trying, with a longer and longer timeout. | |
def main(): | |
best = 0 | |
timeout = 10 | |
while 1: | |
print("Search for at least:", best, "timeout:", timeout) | |
new_best = search(best, timeout) | |
if best == new_best: | |
timeout = timeout * 3 // 2 | |
else: | |
best = new_best | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment