Last active
December 16, 2016 14:14
-
-
Save jochumdev/4c5b42dbdb4ea5c46e71919353e2d94e to your computer and use it in GitHub Desktop.
An algorythm to compare SQL Primary keys and sync A -> B.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import random | |
def compare(all_read_pks, all_write_pks): | |
"""This function emulates a mathematical problem i try to solve. | |
I have primary keys from a SQL Table A and primary_keys in SQL Table B. | |
This function should compare B with A and sync all primary_keys changes to B. | |
The biggest problem is here that i have to compare numbers in blocks and compare | |
a single block until its fully synced. | |
""" # noqa | |
block_size = 100 | |
compared = 0 | |
to_compare = len(all_read_pks) | |
if len(all_write_pks) > len(all_read_pks): | |
to_compare = len(all_write_pks) | |
while compared < to_compare: | |
read_pks = set(list(all_read_pks)[compared:compared + block_size]) | |
write_pks = set(list(all_write_pks)[compared:compared + block_size]) | |
if read_pks == write_pks: | |
compared += block_size | |
continue | |
matching = read_pks & write_pks | |
for i in write_pks - matching: | |
all_write_pks.remove(i) | |
for i in read_pks - write_pks: | |
all_write_pks.add(i) | |
if read_pks == write_pks: | |
compared += len(read_pks) | |
continue | |
return all_read_pks == all_write_pks | |
if __name__ == '__main__': | |
seta = set(range(1000)) | |
setb = set(range(1000)) | |
# Remove random rows from both sets "tables" | |
numbers1 = set(random.sample(range(1000), 500)) | |
numbers2 = set(random.sample(range(500, 1000), 250)) | |
# Compare with 500 random rows removed from both tables. | |
print("Remove in both is: ", compare(seta - numbers1, setb - numbers2)) | |
# Compare with 500 random rows removed from source only | |
print("Remove in source is: ", compare(seta - numbers1, setb)) | |
# Compare with 500 random rows removed from target only | |
print("Remove in target is: ", compare(seta, setb - numbers2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment