Last active
September 16, 2020 16:14
-
-
Save jaklinger/1c9596fcbc5a2c266506fede645ab1f3 to your computer and use it in GitHub Desktop.
Tells you whether a query is related to tech
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import wikipedia | |
from functools import lru_cache | |
@lru_cache() | |
def get_page_cats(query): | |
try: | |
page = wikipedia.page(query, auto_suggest=False) | |
except (wikipedia.PageError, wikipedia.DisambiguationError): | |
return [] | |
return page.categories | |
def is_tech(query, depth=0, max_depth=2): | |
if any(term in query.split() for term in ['containing', 'needing', 'need', 'dead', 'Wikipedia', 'unsourced']): | |
return False | |
#print('\t'*depth, query) | |
cats = get_page_cats(query) | |
if 'Technology-related lists' in cats: | |
return True | |
if any(keyword == term.lower() | |
for cat in cats for term in cat.split() | |
for keyword in ('computer', 'computing', 'software', 'hardware')): | |
return True | |
elif depth == max_depth: | |
return False | |
for cat in cats: | |
if is_tech(cat, depth=depth+1, max_depth=max_depth): | |
return True | |
return False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment