-
-
Save wy193777/0e2a4932e81afc6aa4c8f7a2984f34e2 to your computer and use it in GitHub Desktop.
""" | |
This is free and unencumbered software released into the public domain. | |
Anyone is free to copy, modify, publish, use, compile, sell, or | |
distribute this software, either in source code form or as a compiled | |
binary, for any purpose, commercial or non-commercial, and by any | |
means. | |
In jurisdictions that recognize copyright laws, the author or authors | |
of this software dedicate any and all copyright interest in the | |
software to the public domain. We make this dedication for the benefit | |
of the public at large and to the detriment of our heirs and | |
successors. We intend this dedication to be an overt act of | |
relinquishment in perpetuity of all present and future rights to this | |
software under copyright law. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
OTHER DEALINGS IN THE SOFTWARE. | |
For more information, please refer to <http://unlicense.org/> | |
""" | |
import requests | |
from tqdm import tqdm | |
def download_from_url(url, dst): | |
""" | |
@param: url to download file | |
@param: dst place to put the file | |
""" | |
file_size = int(urlopen(url).info().get('Content-Length', -1)) | |
if os.path.exists(dst): | |
first_byte = os.path.getsize(dst) | |
else: | |
first_byte = 0 | |
if first_byte >= file_size: | |
return file_size | |
header = {"Range": "bytes=%s-%s" % (first_byte, file_size)} | |
pbar = tqdm( | |
total=file_size, initial=first_byte, | |
unit='B', unit_scale=True, desc=url.split('/')[-1]) | |
req = requests.get(url, headers=header, stream=True) | |
with(open(dst, 'ab')) as f: | |
for chunk in req.iter_content(chunk_size=1024): | |
if chunk: | |
f.write(chunk) | |
pbar.update(1024) | |
pbar.close() | |
return file_size |
import requests
from tqdm import tqdm
import os
def download_from_url(url, dst):
"""
@param: url to download file
@param: dst place to put the file
"""
file_size = int(requests.head(url).headers["Content-Length"])
if os.path.exists(dst):
first_byte = os.path.getsize(dst)
else:
first_byte = 0
if first_byte >= file_size:
return file_size
header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
pbar = tqdm(
total=file_size, initial=first_byte,
unit='B', unit_scale=True, desc=url.split('/')[-1])
req = requests.get(url, headers=header, stream=True)
with(open(dst, 'ab')) as f:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(1024)
pbar.close()
return file_size
Because before downloading I need to login with a csrfmiddlewaretoken
, I use the following code:
def download_from_url(session_requests, url, destination_folder):
"""
@param: url to download file
@param: dst place to put the file
"""
result = session_requests.get(
url,
stream = True,
headers = dict(referer = url)
)
download_details = {}
download_details['name'] = re.findall("filename=(.+)", result.headers['content-disposition'])[0]
download_details['size'] = int(result.headers["Content-Length"])
dst = os.path.join(destination_folder, download_details['name'])
if Path(dst).is_file():
first_byte = os.path.getsize(dst)
else:
first_byte = 0
if first_byte >= download_details['size']:
return download_details['size']
header = {"Range": "bytes=%s-%s" % (first_byte, download_details['size'])}
pbar = tqdm(
total=download_details['size'],
initial=first_byte,
unit='B',
unit_scale=True,
desc=download_details['name'])
req = session_requests.get(url, headers=header, stream=True)
with(open(dst, 'ab')) as f:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(1024)
pbar.close()
return download_details['size']
where session_requests
is:
session_requests = requests.session()
# authentication and login section
# ...
# ...
download_from_url(session_requests, url, folder_to_download_to)
Add unit_divisor=1024
to pbar = tqdm(...)
to get precise file size.
Since you already imported requests, I would replace the urlopen call with a requests call: