Skip to content

Instantly share code, notes, and snippets.

@PatrykGala
Last active March 31, 2021 10:46
Show Gist options
  • Save PatrykGala/6b34a3f7a9bb3189ea8adc87835b97c1 to your computer and use it in GitHub Desktop.
Save PatrykGala/6b34a3f7a9bb3189ea8adc87835b97c1 to your computer and use it in GitHub Desktop.
Upload large file to GCS with requests and resumable api (python 2.7)
#python3.7
import requests
from datetime import datetime
import time
file_size = 2194418361937 #TODO size in bytes
LOCATION = "https://storage.googleapis.com/upload/storage/v1/b/sc-9369-dataengineering-prod-qeppo-arch/o?uploadType=resumable&name=****" #TODO change location upload
while True:
res = requests.put(
url=LOCATION,
headers={
'Content-Length': '0',
"Content-Range": "bytes */file_size",
})
res.raise_for_status()
range = res.headers['range']
bytes = int(range.replace('bytes=0-', ''))
percentage = bytes / file_size * 100
print((datetime.now().strftime("%d-%m-%y %H:%M ") + ("%.1f" % percentage) + "%") + " " + (
"%.2f" % ((file_size - bytes) / 1024 / 1024 / 1024)) + "GB")
time.sleep(5)
#python 2.7
from datetime import datetime
from requests.adapters import HTTPAdapter
from urllib3 import Retry
import requests
CHUNK_SIZE = max(262144, 1024 * 1024 * 100)
FILENAME = 'filename' #TODO
TOKEN = "TOKEN" #TODO
file_size = 2194418361937 #TODO
def read_in_chunks(file_object, chunk_size=CHUNK_SIZE):
offset = -1
while True:
data = file_object.read(chunk_size)
if not data:
break
start_offset = offset + 1
offset = offset + len(data)
yield data, start_offset, offset
progress_percentage = '-1'
def progress(percentage):
global progress_percentage
if progress_percentage != percentage:
print datetime.now().strftime("%d-%m-%y %H:%M ") + ("%.1f" % percentage) + "%"
progress_percentage = ("%.1f" % percentage)
retry_strategy = Retry(connect=5, read=5, status=5, status_forcelist=[500], raise_on_status=True)
adapter = HTTPAdapter(max_retries=retry_strategy)
with open(FILENAME, 'rb') as f:
res = requests.post(
url='https://storage.googleapis.com/upload/storage/v1/b/sc-9369-dataengineering-prod-qeppo-arch/o?uploadType=resumable&name={path}_{filename}'.format(
path=datetime.now().strftime("%d_%m_%y_%H_%M"), filename='filename'), #TODO change filename
headers={
'X-Upload-Content-Type': 'text/csv',
"Authorization": "Bearer " + TOKEN,
})
res.raise_for_status()
location = res.headers['location']
print location
for piece, first_byte, last_byte in read_in_chunks(f):
content_range = "bytes {first_byte}-{last_byte}/{file_size}".format(first_byte=first_byte, last_byte=last_byte,
file_size=file_size)
progress(float(first_byte) / float(file_size) * float(100))
with requests.Session() as session:
session.mount("https://", adapter)
response = session.put(location, data=piece, headers={
"Content-Length": str(len(piece)),
"Content-Range": content_range
})
response.raise_for_status()
progress(float(first_byte) / float(file_size) * float(100))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment