Created
April 19, 2018 07:29
-
-
Save frankgeerlings/ef16bc64c0dead960ea46795525030f1 to your computer and use it in GitHub Desktop.
Een script om de verwerking van de tennisvrouwencompetitie te vereenvoudigen
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot, mwparserfromhell, re | |
from pprint import pprint | |
sourcepage = 'Gebruiker:Vinkje83/WTA150' | |
class Dame: | |
def __init__(self, rij): | |
nummer, speelster, punten, verandering = rij._contents.nodes | |
self.nieuwe_rangpositie = int(nummer.contents.strip_code().replace('. ', '')) | |
self.naam = speelster.contents.filter_wikilinks()[0].title | |
self.verandering = verandering.contents.filter_templates()[0].title() | |
self.bestaande_rangpositie = 0 | |
def load_enkelhoogstepositie(self, site): | |
p = pywikibot.Page(site, self.naam) | |
wikicode = mwparserfromhell.parse(p.text) | |
"Enkelhoogstepositie Infobox tennisspeler" | |
infoboxes = [template for template in wikicode.filter_templates(matches=lambda t: t.name.matches('Infobox tennisspeler'))] | |
if len(infoboxes) is 0: | |
self.bestaande_rangpositie = -1 | |
return | |
positie_text = infoboxes[0].get('Enkelhoogstepositie').value.strip_code() # "17. (18 april 2018)" | |
self.bestaande_rangpositie = int(re.findall('[0-9]+', positie_text)[0]) | |
def laadtennistabel(site): | |
p = pywikibot.Page(site, sourcepage) | |
wikicode = mwparserfromhell.parse(p.text) | |
dames = [Dame(rij) for rij in wikicode.filter_tags(matches=lambda node: node.tag == 'tr')[1:]] | |
"Dames met verlies en stabiel overslaan, blijft over winst" | |
dames = [dame for dame in dames if dame.verandering == '{{Winst}}' ] | |
for dame in dames: | |
dame.load_enkelhoogstepositie(site) | |
return dames | |
def main(*args): | |
local_args = pywikibot.handle_args(args) | |
site = pywikibot.Site(code='nl', fam='wikipedia') | |
dames = laadtennistabel(site) | |
geenbox = [(dame.naam, 'Geen infobox') for dame in dames if dame.bestaande_rangpositie is -1] | |
nietgevonden = [(dame.naam, 'Geen bestaande rangpositie') for dame in dames if dame.bestaande_rangpositie is 0] | |
gedaald = [(dame.naam, 'N < H (%d < %d)' % (dame.nieuwe_rangpositie, dame.bestaande_rangpositie)) for dame in dames if dame.nieuwe_rangpositie < dame.bestaande_rangpositie] | |
pprint(nietgevonden + gedaald + geenbox) | |
if __name__ == "__main__": | |
try: | |
main() | |
except Exception: | |
pywikibot.error("Fatal error:", exc_info=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment