Skip to content

Instantly share code, notes, and snippets.

@LEW21
Created November 20, 2014 12:26
Show Gist options
  • Save LEW21/69556dd0a7cac5125655 to your computer and use it in GitHub Desktop.
Save LEW21/69556dd0a7cac5125655 to your computer and use it in GitHub Desktop.
Skrypt pobierający wszystkie dane PKW ze strony http://klk.kbw.gov.pl/kalkulator/20141116/000000/SMD/
import requests
import re
import time
import sys
def URL(teryt):
while teryt[-2:] == "00":
teryt = teryt[:-2]
url = "http://klk.kbw.gov.pl/kalkulator/20141116/000000/SMD"
if len(teryt) > 0:
url += "/" + teryt[0:2] + "0000"
if len(teryt) > 2:
url += "/" + teryt[0:4] + "00"
if len(teryt) > 4:
url += "/" + teryt
return url
def getOKW(teryt, okw):
url = "http://klk.kbw.gov.pl/TMP/" + teryt + "/" + okw + "/" + teryt + "-" + okw + ".zip"
print(url)
sys.stdout.flush()
MAX = 100
while MAX:
MAX -= 1
try:
requests.get(URL(teryt) + "/" + okw)
res = requests.get(url)
if res.ok:
print(res)
sys.stdout.flush()
return res.content
except:
pass
print("Error, sleeping 1s")
sys.stdout.flush()
time.sleep(1)
def saveOKW(teryt, okw):
if teryt < "026401":
return
data = getOKW(teryt, okw)
with open(teryt + "-" + okw + ".zip", "wb") as f:
f.write(data)
def ls(teryt):
url = URL(teryt)
data = requests.get(url)
if not data.ok:
print(data)
sys.stdout.flush()
list = re.findall(b'href="([^"]*)"', data.content)
return [e.decode().split("/")[-1] for e in list]
for woj in ls(""):
for powiat in ls(woj):
for gmina in ls(powiat):
for okw in ls(gmina):
saveOKW(gmina, okw)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment