Last active
August 17, 2022 09:20
-
-
Save z3nth10n/654895e91685dcad1e270e8201ed18d4 to your computer and use it in GitHub Desktop.
Centro de Descarga GNIG
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import aiofiles | |
import aiohttp | |
from progress.bar import Bar | |
import time | |
# from syncer import sync | |
import asyncio | |
async def async_http_download(descargas, chunk_size=65536): | |
start = time.process_time() | |
rate=0 | |
for i in descargas: | |
with Bar(f"Descargando {i['dest_file']}", max=i['tamano']) as bar: | |
async with aiofiles.open(i['dest_file'], 'wb') as fd: | |
async with aiohttp.ClientSession() as session: | |
async with session.post(i['src_url'], data=i['payload']) as resp: | |
dl = 0 | |
async for chunk in resp.content.iter_chunked(chunk_size): | |
await fd.write(chunk) | |
size=len(chunk) | |
dl += size | |
ddl=round(dl/1024**2, 2) | |
dsiz=round(i['tamano']/1024**2, 2) | |
dd=time.process_time() - start | |
rate=round((dl)//(dd if dd > 0 else 1)/(1024**2), 2) | |
bar.suffix = f"%(percent)d%% {rate} MB/s | {ddl} MB of {dsiz} MB | %(eta)d s" | |
bar.next(size) | |
def main(page): | |
urlLidar = "http://centrodedescargas.cnig.es/CentroDescargas/resultadosArchivos" | |
payload = { | |
'geom': None, | |
'coords': '', | |
'numPagina': page, | |
'numTotalReg': 1180, | |
'codSerie': 'MDSNE', | |
'series': 'MDSNE', | |
'codProvAv': '', | |
'codIneAv': '', | |
'codComAv': '', | |
'numHojaAv': '', | |
'todaEsp': 'N', | |
'todoMundo': 'S', | |
'tipoBusqueda': 'AV', | |
'tipoArchivo': '', | |
'contiene': '', | |
'subSerieExt': '', | |
'codSubSerie': '', | |
'idProcShape': '', | |
'orderBy': '' | |
} | |
x = requests.post(urlLidar, payload) | |
soup = BeautifulSoup(x.text, 'html.parser') | |
descargas=[] | |
for el in soup.select('a.link'): | |
if el.parent.parent.parent.parent.name != 'tr': | |
continue | |
parent=el.parent.parent.parent.parent | |
archivo=parent.select_one('td.txtLeft').text | |
tamano=int(float(parent.select_one('td.txtRight').text)*1024**2) | |
id=int(el['id'].split('_')[1]) | |
if archivo.find('NDSM-EDIFICACION-ETRS89-H30-') > -1: | |
descargas.append({ | |
'src_url': 'http://centrodedescargas.cnig.es/CentroDescargas/descargaDir', | |
'payload': { | |
'secuencialDescDir': id, | |
'aceptCodsLicsDD_0': 15 | |
}, | |
'tamano': tamano, | |
'dest_file': archivo | |
}) | |
# sync(async_http_download('http://centrodedescargas.cnig.es/CentroDescargas/descargaDir', { | |
# 'secuencialDescDir': id, | |
# 'aceptCodsLicsDD_0': 15 | |
# }, tamano, archivo)) | |
# print(descargas) | |
asyncio.run(async_http_download(descargas)) | |
# print(f"{archivo}, {id}, {tamano}") | |
for p in range(5, 21): | |
main(p) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment