Last active
October 18, 2021 21:07
-
-
Save dexity/23232c394e707132b139fa70ccf9d801 to your computer and use it in GitHub Desktop.
Memory profiling for top key4Hashes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Profiles BigQuery results for Top N key4Hashes.""" | |
# pip install memory_profiler | |
from memory_profiler import profile | |
from google.cloud import bigquery as bq | |
QUERY_TMPL = """SELECT * | |
FROM `phyndexer-production.phyndexer_production.key4Grouped1` | |
where partId = 1 | |
and key4Hash in ( | |
SELECT key4Hash | |
FROM `phyndexer-production.phyndexer_production.key4_uniqHashes_1` | |
where nModelsRank <= {top_n} | |
)""" | |
def format(items): | |
out = [] | |
for item in items: | |
out.append({'key4Hash': item.key4Hash, 'key4Count': item.key4Count, 'md5s': item.md5s}) | |
return out | |
@profile | |
def mem_usage(n): | |
client = bq.Client() | |
q_job = client.query(QUERY_TMPL.format(top_n=n)) | |
result = q_job.result() | |
output = format(result) | |
if __name__ == '__main__': | |
mem_usage(1000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment