Skip to content

Instantly share code, notes, and snippets.

@bosukh
Created November 26, 2017 07:27
Show Gist options
  • Save bosukh/cc6b432306309364a275bb1cbace152d to your computer and use it in GitHub Desktop.
Save bosukh/cc6b432306309364a275bb1cbace152d to your computer and use it in GitHub Desktop.
# CREATE USER 'example'@'localhost' IDENTIFIED BY 'example_password';
# GRANT ALL PRIVILEGES ON *.* TO 'example2'@'localhost' IDENTIFIED BY 'example_password';
# CREATE DATABASE example;
import sys
import os
import time
from datetime import date, timedelta
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.dialects.mysql import DATETIME, DATE
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.sql.expression import bindparam
Base = declarative_base()
DBSession = scoped_session(sessionmaker())
engine = None
dbname = "mysql+mysqldb://example:example_password@127.0.0.1:3306/example"
# Generate some dates for birth_date columns
date_list = []
date_str_list = []
today = date.today() - timedelta(days = 1000)
j = 1
for i in range(1000):
j += i
j = j % 40000
date_list.append(today - timedelta(days = j))
date_str_list.append((today - timedelta(days = j)).strftime('%Y-%m-%d'))
class Customer(Base):
__tablename__ = "customer"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String(50), nullable=False)
date_of_birth = Column(DATE, nullable=False)
def init_sqlalchemy(remove=True):
global engine
engine = create_engine(dbname, echo=False)
DBSession.remove()
DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
if remove:
# this will remove the exisitng customer table and create a new one.
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
def test_sqlalchemy_orm_update(n=100000):
init_sqlalchemy(False)
t0 = time.time()
customer = Customer.__table__
for i in xrange(1, n+1):
stmt = customer.update().where(customer.c.id==i).values(
{'name': 'NEW_NAME ' + str(i)}
)
DBSession.execute(stmt)
DBSession.commit()
print(
"SQLAlchemy ORM: Total time for " + str(n) +
" records " + str(time.time() - t0) + " secs")
def test_sqlalchemy_orm_update_in_chuck(n=100000):
init_sqlalchemy(False)
t0 = time.time()
customer = Customer.__table__
stmt = customer.update().where(customer.c.id == bindparam('_id')).values(
{'name': bindparam('name')}
)
DBSession.execute(stmt, [{'name': 'NEW_NAME ' + str(i), '_id': i} for i in xrange(1, n+1)])
DBSession.commit()
print(
"SQLAlchemy ORM list: Total time for " + str(n) +
" records " + str(time.time() - t0) + " secs")
def test_sqlalchemy_orm_bulk_update(n=100000):
init_sqlalchemy(False)
t0 = time.time()
DBSession.bulk_update_mappings(
Customer,
[{'name': 'NEW_NAME ' + str(i), 'id': i}
for i in xrange(1, n+1)]
)
DBSession.commit()
print(
"SQLAlchemy ORM bulk_update_mappings(): Total time for " + str(n) +
" records " + str(time.time() - t0) + " secs")
def test_sqlalchemy_core_update(n=100000):
init_sqlalchemy(False)
t0 = time.time()
engine.execute(
Customer.__table__.update(),
[{'name': 'NEW_NAME ' + str(i), '_id': i}
for i in xrange(1, n+1)]
)
print(
"SQLAlchemy Core: Total time for " + str(n) +
" records " + str(time.time() - t0) + " secs")
def insert(n=100000):
init_sqlalchemy(True)
t0 = time.time()
engine.execute(
Customer.__table__.insert(),
[
{"name": 'NAME ' + str(i),
'date_of_birth': date_list[i%len(date_list)]}
for i in xrange(n)
]
)
def test_mysql_update(n=100000):
init_sqlalchemy(False)
t0 = time.time()
for i in xrange(n):
DBSession.execute(
"UPDATE customer SET name = '{0}' WHERE '{1}'".format(
'NEW_NAME ' + str(i), str(i)
)
)
DBSession.commit()
print(
"mysql: Total time for " + str(n) +
" records " + str(time.time() - t0) + " sec")
def test_raw_update(n=100000):
init_sqlalchemy(False)
t0 = time.time()
f = open('customers_to_insert', 'w')
for i in xrange(1, n+1):
f.write(
"{0},{1}\n".format(
i, 'NEW_NAME ' + str(i)
)
)
f.close()
os.system(
'./raw_update_script.sh \
{file_name} {table_name} {db_name} {username} {password}'.format(
file_name = 'customers_to_insert',
table_name = 'customer',
db_name = 'example',
username = 'example',
password = 'example_password'
)
)
os.remove('./customers_to_insert')
print(
"mysql raw update: Total time for " + str(n) +
" records " + str(time.time() - t0) + " sec")
if __name__ == '__main__':
try:
n = int(sys.argv[1])
except (ValueError, IndexError) as e:
n = 500
insert(n)
test_sqlalchemy_orm_update(n)
insert(n)
test_sqlalchemy_orm_update_in_chuck(n)
insert(n)
test_sqlalchemy_core_update(n)
insert(n)
test_mysql_update(n)
insert(n)
test_sqlalchemy_orm_bulk_update(n)
insert(n)
test_raw_update(n)
@vvksahoo
Copy link

Hi bosukh, great example . It saved my lot of times. I have one doubt at "test_sqlalchemy_orm_bulk_update " , here you are having 'id' as primary key or may be you are querying based on 'id' and updating 'name' for that 'id'. In my case 'id' is not a primary key and i want to query based on "X"(Let suppose x is a field and its primary key) , so how do i do bulk update using 'bulk_update_mappings' ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment