jolynch · December 10, 2023 08:05
diff --git a/example.txt b/example.txt
 $ ls -laR                                                                       
 .:                                                                              
 total 6291456                                                                   
 drwxr-sr-x 3 user group        136 Jul 25 01:47 .                               
 drwxrwsr-x 5 user group         67 Jul 25 01:48 ..                              
 drwxr-sr-x 2 root group         46 Jul 25 01:40 foo                             
 -rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.2.0                   
 -rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.3.0                   
 -rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.4.0                   
 -rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.5.0                   
 -rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.6.0                   
 -rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.7.0                   
                                                                                
 ./foo:                                                                          
 total 2097152                                                                   
 drwxr-sr-x 2 root group         46 Jul 25 01:40 .                                                                                                                                                                                                                                                                                                    
 drwxr-sr-x 3 user group        136 Jul 25 01:47 ..                              
 -rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.0.0                   
 -rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.1.0

 $ ../rapid_s3_upload.sh s3://some-bucket-name/test_josephl_upload
 Using [/usr/bin/zstd -1 -c] to compress files and [/usr/local/bin/aws s3 cp -] to upload them
 Uploading
 randwrite.7.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.7.0
 randwrite.6.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.6.0
 randwrite.5.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.5.0
 randwrite.4.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.4.0
 randwrite.3.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.3.0
 randwrite.2.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.2.0
 foo/randwrite.0.0 -> s3://some-bucket-name/test_josephl_upload/foo/randwrite.0.0
 foo/randwrite.1.0 -> s3://some-bucket-name/test_josephl_upload/foo/randwrite.1.0
 Waiting 5s for you to cancel if you don't want this
 foo/randwrite.1.0    : 16.09%   (1073741824 => 172779474 bytes, /*stdout*\)    
 foo/randwrite.0.0    : 16.08%   (1073741824 => 172607631 bytes, /*stdout*\)    
 randwrite.2.0        : 16.06%   (1073741824 => 172391826 bytes, /*stdout*\)    
 randwrite.5.0        : 16.05%   (1073741824 => 172364194 bytes, /*stdout*\)    
 randwrite.6.0        : 15.96%   (1073741824 => 171324403 bytes, /*stdout*\)    
 randwrite.7.0        : 16.05%   (1073741824 => 172388299 bytes, /*stdout*\)    
 randwrite.3.0        : 16.13%   (1073741824 => 173244139 bytes, /*stdout*\)    
 randwrite.4.0        : 16.08%   (1073741824 => 172652481 bytes, /*stdout*\)    

 real    0m15.059s
 user    1m43.584s
 sys     0m8.990s

 $ COMPRESS="$(which lz4)" ../rapid_s3_upload.sh s3://some-bucket-name/test_josephl_upload
 Using [/usr/bin/lz4 -c] to compress files and [/usr/local/bin/aws s3 cp -] to upload them
 Uploading
 randwrite.7.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.7.0
 randwrite.6.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.6.0
 randwrite.5.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.5.0
 randwrite.4.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.4.0
 randwrite.3.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.3.0
 randwrite.2.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.2.0
 foo/randwrite.0.0 -> s3://some-bucket-name/test_josephl_upload/foo/randwrite.0.0
 foo/randwrite.1.0 -> s3://some-bucket-name/test_josephl_upload/foo/randwrite.1.0
 Waiting 5s for you to cancel if you don't want this

 real    0m36.217s
 user    1m8.704s
 sys     0m22.052s
diff --git a/rapid_s3_upload.sh b/rapid_s3_upload.sh
 #!/bin/bash

 if [ -z "${COMPRESS}" ]; then
    COMPRESS=$(which zstd)
    if [ -z "${COMPRESS}" ]; then
        COMPRESS=$(which lz4)
    else
        # zstd is reasonably slow, so make it as fast as we can
        COMPRESS="${COMPRESS} -1"
    fi
 fi

 UPLOAD="$(which aws) s3 cp -"
 BUCKET_PATH=$1

 if [ -z "${COMPRESS}" ]; then
    echo "zstd or lz4 is required for maximum upload speeds, please install liblz4-tool"
    exit 1
 else
    # lz4 default level is pretty reasonable
    COMPRESS="${COMPRESS} -c"
 fi

 echo "Using [${COMPRESS}] to compress files and [${UPLOAD}] to upload them"

 if [ -z "${BUCKET_PATH}" ]; then
    echo "usage: rapid_s3_upload.sh s3://<bucket>/<path>..."
    exit 1
 fi

 echo "Uploading"
 find . -type f -printf '%P\n' | xargs -IX bash -c "echo 'X -> ${BUCKET_PATH}/X'"

 echo "Waiting 5s for you to cancel if you don't want this"
 sleep 5

 time find . -type f -printf '%P\n' | xargs -P $(getconf _NPROCESSORS_ONLN) -IX bash -c "${COMPRESS} X | ${UPLOAD} ${BUCKET_PATH}/X"
diff --git a/S3_fast_upload.md b/S3_fast_upload.md
	$ ls -laR
	.:
	total 6291456
	drwxr-sr-x 3 user group 136 Jul 25 01:47 .
	drwxrwsr-x 5 user group 67 Jul 25 01:48 ..
	drwxr-sr-x 2 root group 46 Jul 25 01:40 foo
	-rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.2.0
	-rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.3.0
	-rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.4.0
	-rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.5.0
	-rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.6.0
	-rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.7.0

	./foo:
	total 2097152
	drwxr-sr-x 2 root group 46 Jul 25 01:40 .
	drwxr-sr-x 3 user group 136 Jul 25 01:47 ..
	-rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.0.0
	-rw-r--r-- 1 user group 1073741824 Jul 25 00:50 randwrite.1.0

	$ ../rapid_s3_upload.sh s3://some-bucket-name/test_josephl_upload
	Using [/usr/bin/zstd -1 -c] to compress files and [/usr/local/bin/aws s3 cp -] to upload them
	Uploading
	randwrite.7.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.7.0
	randwrite.6.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.6.0
	randwrite.5.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.5.0
	randwrite.4.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.4.0
	randwrite.3.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.3.0
	randwrite.2.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.2.0
	foo/randwrite.0.0 -> s3://some-bucket-name/test_josephl_upload/foo/randwrite.0.0
	foo/randwrite.1.0 -> s3://some-bucket-name/test_josephl_upload/foo/randwrite.1.0
	Waiting 5s for you to cancel if you don't want this
	foo/randwrite.1.0 : 16.09% (1073741824 => 172779474 bytes, /stdout\)
	foo/randwrite.0.0 : 16.08% (1073741824 => 172607631 bytes, /stdout\)
	randwrite.2.0 : 16.06% (1073741824 => 172391826 bytes, /stdout\)
	randwrite.5.0 : 16.05% (1073741824 => 172364194 bytes, /stdout\)
	randwrite.6.0 : 15.96% (1073741824 => 171324403 bytes, /stdout\)
	randwrite.7.0 : 16.05% (1073741824 => 172388299 bytes, /stdout\)
	randwrite.3.0 : 16.13% (1073741824 => 173244139 bytes, /stdout\)
	randwrite.4.0 : 16.08% (1073741824 => 172652481 bytes, /stdout\)

	real 0m15.059s
	user 1m43.584s
	sys 0m8.990s

	$ COMPRESS="$(which lz4)" ../rapid_s3_upload.sh s3://some-bucket-name/test_josephl_upload
	Using [/usr/bin/lz4 -c] to compress files and [/usr/local/bin/aws s3 cp -] to upload them
	Uploading
	randwrite.7.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.7.0
	randwrite.6.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.6.0
	randwrite.5.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.5.0
	randwrite.4.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.4.0
	randwrite.3.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.3.0
	randwrite.2.0 -> s3://some-bucket-name/test_josephl_upload/randwrite.2.0
	foo/randwrite.0.0 -> s3://some-bucket-name/test_josephl_upload/foo/randwrite.0.0
	foo/randwrite.1.0 -> s3://some-bucket-name/test_josephl_upload/foo/randwrite.1.0
	Waiting 5s for you to cancel if you don't want this

	real 0m36.217s
	user 1m8.704s
	sys 0m22.052s
	#!/bin/bash

	if [ -z "${COMPRESS}" ]; then
	COMPRESS=$(which zstd)
	if [ -z "${COMPRESS}" ]; then
	COMPRESS=$(which lz4)
	else
	# zstd is reasonably slow, so make it as fast as we can
	COMPRESS="${COMPRESS} -1"
	fi
	fi

	UPLOAD="$(which aws) s3 cp -"
	BUCKET_PATH=$1

	if [ -z "${COMPRESS}" ]; then
	echo "zstd or lz4 is required for maximum upload speeds, please install liblz4-tool"
	exit 1
	else
	# lz4 default level is pretty reasonable
	COMPRESS="${COMPRESS} -c"
	fi

	echo "Using [${COMPRESS}] to compress files and [${UPLOAD}] to upload them"

	if [ -z "${BUCKET_PATH}" ]; then
	echo "usage: rapid_s3_upload.sh s3://<bucket>/<path>..."
	exit 1
	fi

	echo "Uploading"
	find . -type f -printf '%P\n' \| xargs -IX bash -c "echo 'X -> ${BUCKET_PATH}/X'"

	echo "Waiting 5s for you to cancel if you don't want this"
	sleep 5

	time find . -type f -printf '%P\n' \| xargs -P $(getconf _NPROCESSORS_ONLN) -IX bash -c "${COMPRESS} X \| ${UPLOAD} ${BUCKET_PATH}/X"