Skip to content

Commit 185df12

Browse files
rayluvineetgopal
authored andcommitted
Initial commit
0 parents  commit 185df12

File tree

8 files changed

+449
-0
lines changed

8 files changed

+449
-0
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
__pycache__/
2+
/build/
3+
/dist/
4+
/sqlalchemy_batch_inserts.egg-info/

Dockerfile

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
FROM postgres:9.6.9
2+
3+
RUN apt-get update \
4+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y python3-pip \
5+
&& rm -r /var/lib/apt/lists/*
6+
RUN pip3 install --quiet psycopg2-binary sqlalchemy

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2019 Benchling
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
Benchling uses SQLAlchemy and psycopg2 to talk to PostgreSQL.
2+
To save on round-trip latency, we batch our inserts using this code.
3+
4+
In summary, committing 100 models in SQLAlchemy does 100 roundtrips
5+
to the database if the model has an autoincrementing primary key.
6+
This module improves this to 2 roundtrips without requiring any
7+
other changes to your code.
8+
9+
## Usage
10+
11+
```python
12+
from sqlalchemy import create_engine
13+
from sqlalchemy.orm import sessionmaker
14+
from sqlalchemy_batch_inserts import enable_batch_inserting
15+
16+
engine = create_engine("postgresql+psycopg2://postgres@localhost", use_batch_mode=True)
17+
Session = sessionmaker(bind=engine)
18+
session = Session()
19+
enable_batch_inserting(session)
20+
```
21+
22+
If you use [Flask-SQLALchemy](https://flask-sqlalchemy.palletsprojects.com/),
23+
24+
```python
25+
from flask_sqlalchemy import SignallingSession
26+
from sqlalchemy_batch_inserts import enable_batch_inserting
27+
28+
enable_batch_inserting(SignallingSession)
29+
```
30+
31+
## Demo
32+
33+
```
34+
docker build -t sqla_batch .
35+
docker run --rm -v $PWD:/src --name sqla_batch sqla_batch
36+
# Wait for it to finish spinning up
37+
# Switch to another shell
38+
docker exec -it sqla_batch src/demo.py no 100
39+
docker exec -it sqla_batch src/demo.py yes 100
40+
```
41+
42+
To simulate 100 * 3 inserts with 20 ms latency,
43+
first change the connection string in demo.py from
44+
`postgresql+psycopg2://postgres@localhost` to `postgresql+psycopg2://postgres@db`.
45+
Then,
46+
```
47+
docker network create sqla_batch
48+
docker run --rm --network sqla_batch --network-alias db --name db sqla_batch
49+
# Switch to another shell
50+
docker run -it -v /var/run/docker.sock:/var/run/docker.sock --network sqla_batch gaiaadm/pumba netem --duration 15m --tc-image gaiadocker/iproute2 delay --time 20 --jitter 0 db
51+
# Switch to another shell
52+
# This should take 100 * 3 * 20 ms = 6 seconds
53+
docker run -it --rm -v $PWD:/src --network sqla_batch sqla_batch src/demo.py no 100
54+
docker run -it --rm -v $PWD:/src --network sqla_batch sqla_batch src/demo.py yes 100
55+
```
56+
57+
## Maintainer notes
58+
59+
After bumping the `version` in `setup.py` and `__version__` in `__init__.py`,
60+
61+
```
62+
$ ./setup.py sdist bdist_wheel # Generate source and py3 wheel
63+
$ python2 setup.py bdist_wheel # Generate py2 wheel
64+
$ twine upload --repository-url https://test.pypi.org/legacy/ dist/*
65+
# Check https://test.pypi.org/project/sqlalchemy-batch-inserts/
66+
$ twine upload dist/*
67+
```

demo.py

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/usr/bin/env python3
2+
3+
from contextlib import contextmanager
4+
import os
5+
import sys
6+
import time
7+
8+
from sqlalchemy import create_engine, Column, ForeignKey, Integer, String
9+
from sqlalchemy.orm import relationship, sessionmaker
10+
from sqlalchemy.ext.declarative import declarative_base
11+
12+
from sqlalchemy_batch_inserts import enable_batch_inserting
13+
14+
Base = declarative_base()
15+
16+
class User(Base):
17+
__tablename__ = "users"
18+
19+
id = Column(Integer, primary_key=True)
20+
name = Column(String, unique=True)
21+
22+
def __repr__(self):
23+
return "<User(name=%r)>" % (self.name)
24+
25+
class Address(Base):
26+
__tablename__ = "addresses"
27+
id = Column(Integer, primary_key=True)
28+
email_address = Column(String, nullable=False, unique=True)
29+
user_id = Column(Integer, ForeignKey("users.id"))
30+
31+
user = relationship("User")
32+
33+
def __repr__(self):
34+
return "<Address(email_address=%r)>" % self.email_address
35+
36+
37+
def main():
38+
if len(sys.argv) != 3:
39+
usage_and_exit()
40+
if sys.argv[1] == 'yes':
41+
batch = True
42+
elif sys.argv[1] == 'no':
43+
batch = False
44+
else:
45+
usage_and_exit()
46+
try:
47+
count = int(sys.argv[2])
48+
except ValueError:
49+
usage_and_exit()
50+
51+
engine = create_engine("postgresql+psycopg2://postgres@localhost", use_batch_mode=True, echo=count <= 100)
52+
Base.metadata.create_all(engine)
53+
54+
Session = sessionmaker(bind=engine)
55+
session = Session()
56+
57+
if batch:
58+
enable_batch_inserting(session)
59+
60+
prefix = str(int(time.time()))
61+
for i in range(count):
62+
name = "%s-%d" % (prefix, i)
63+
user = User(name=name)
64+
user.addresses = [
65+
Address(email_address="%[email protected]" % name),
66+
Address(email_address="%[email protected]" % name),
67+
]
68+
session.add(user)
69+
session.add_all(user.addresses)
70+
71+
with Timer() as timer:
72+
session.commit()
73+
users = session.query(User).count()
74+
addresses =session.query(Address).count()
75+
print("took", timer.duration, "seconds")
76+
print("have", users, "users")
77+
print("have", addresses, "addresses")
78+
79+
class Timer:
80+
def __enter__(self):
81+
self.start = time.time()
82+
return self
83+
84+
def __exit__(self, exc_type, value, tb):
85+
self.duration = time.time() - self.start
86+
87+
def usage_and_exit():
88+
sys.stderr.write(sys.argv[0] + " [batch] [count]\n")
89+
sys.stderr.write(sys.argv[0] + " no 50\n")
90+
sys.stderr.write(sys.argv[0] + " yes 1000\n")
91+
sys.exit(1)
92+
93+
if __name__ == "__main__":
94+
main()

requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
psycopg2-binary
2+
sqlalchemy

setup.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/env python3
2+
3+
import setuptools
4+
5+
long_description = """
6+
Benchling uses SQLAlchemy and psycopg2 to talk to PostgreSQL.
7+
To save on round-trip latency, we batch our inserts using this code.
8+
9+
## Usage
10+
11+
```
12+
from sqlalchemy import create_engine
13+
from sqlalchemy.orm import sessionmaker
14+
from sqlalchemy_batch_inserts import enable_batch_inserting
15+
16+
engine = create_engine("postgresql+psycopg2://postgres@localhost", use_batch_mode=True)
17+
Session = sessionmaker(bind=engine)
18+
session = Session()
19+
enable_batch_inserting(session)
20+
```
21+
22+
If you use [Flask-SQLALchemy](https://flask-sqlalchemy.palletsprojects.com/),
23+
24+
```
25+
from flask_sqlalchemy import SignallingSession
26+
27+
enable_batch_inserting(SignallingSession)
28+
```
29+
"""
30+
31+
setuptools.setup(
32+
name="sqlalchemy_batch_inserts",
33+
version="0.0.1",
34+
author="Vineet Gopal",
35+
author_email="[email protected]",
36+
description="Batch inserts for SQLAlchemy on PostgreSQL with psycopg2",
37+
long_description=long_description,
38+
long_description_content_type="text/markdown",
39+
url="https://github.com/benchling/sqlalchemy_batch_inserts",
40+
packages=setuptools.find_packages(),
41+
classifiers=[
42+
"Programming Language :: Python :: 2.7",
43+
"Programming Language :: Python :: 3",
44+
"License :: OSI Approved :: MIT License",
45+
"Operating System :: OS Independent",
46+
],
47+
)

0 commit comments

Comments
 (0)