Skip to content

Commit 56c300e

Browse files
committedMar 9, 2012
Make mongodbtools pip installable
1 parent 5f5b313 commit 56c300e

14 files changed

+491
-270
lines changed
 

‎.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@
22
*.pyc
33
distribute-0.6.10.tar.gz
44
virtualenv
5+
build
6+
dist
7+
mongodbtools.egg-info

‎AUTHORS.md

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
## Authors
2+
* Jason Wilder

‎LICENSE

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Copyright (c) 2012 Jason Wilder
2+
3+
Permission is hereby granted, free of charge, to any person obtaining
4+
a copy of this software and associated documentation files (the
5+
"Software"), to deal in the Software without restriction, including
6+
without limitation the rights to use, copy, modify, merge, publish,
7+
distribute, sublicense, and/or sell copies of the Software, and to
8+
permit persons to whom the Software is furnished to do so, subject to
9+
the following conditions:
10+
11+
The above copyright notice and this permission notice shall be
12+
included in all copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

‎collection-stats.py

-99
This file was deleted.

‎index-stats.py

-116
This file was deleted.
File renamed without changes.

‎mongodbtools/collection_stats.py

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
This script prints some basic collection stats about the size of the
5+
collections and their indexes.
6+
"""
7+
8+
from prettytable import PrettyTable
9+
import psutil
10+
from pymongo import Connection
11+
from pymongo import ReadPreference
12+
13+
connection = Connection(read_preference=ReadPreference.SECONDARY)
14+
15+
def compute_signature(index):
16+
signature = index["ns"]
17+
for key in index["key"]:
18+
signature += "%s_%s" % (key, index["key"][key])
19+
return signature
20+
21+
def get_collection_stats(database, collection):
22+
print "Checking DB: %s" % collection.full_name
23+
return database.command("collstats", collection.name)
24+
25+
# From http://www.5dollarwhitebox.org/drupal/node/84
26+
def convert_bytes(bytes):
27+
bytes = float(bytes)
28+
if bytes >= 1099511627776:
29+
terabytes = bytes / 1099511627776
30+
size = '%.2fT' % terabytes
31+
elif bytes >= 1073741824:
32+
gigabytes = bytes / 1073741824
33+
size = '%.2fG' % gigabytes
34+
elif bytes >= 1048576:
35+
megabytes = bytes / 1048576
36+
size = '%.2fM' % megabytes
37+
elif bytes >= 1024:
38+
kilobytes = bytes / 1024
39+
size = '%.2fK' % kilobytes
40+
else:
41+
size = '%.2fb' % bytes
42+
return size
43+
44+
def main():
45+
summary_stats = {
46+
"count" : 0,
47+
"size" : 0,
48+
"indexSize" : 0
49+
}
50+
all_stats = []
51+
52+
all_db_stats = {}
53+
for db in connection.database_names():
54+
# FIXME: Add an option to include oplog stats.
55+
if db == "local":
56+
continue
57+
58+
database = connection[db]
59+
all_db_stats[database.name] = []
60+
for collection_name in database.collection_names():
61+
stats = get_collection_stats(database, database[collection_name])
62+
all_stats.append(stats)
63+
all_db_stats[database.name].append(stats)
64+
65+
summary_stats["count"] += stats["count"]
66+
summary_stats["size"] += stats["size"]
67+
summary_stats["indexSize"] += stats.get("totalIndexSize", 0)
68+
69+
x = PrettyTable(["Collection", "Count", "% Size", "DB Size", "Avg Obj Size", "Indexes", "Index Size"])
70+
x.set_field_align("Collection", "l")
71+
x.set_field_align("% Size", "r")
72+
x.set_field_align("Count", "r")
73+
x.set_field_align("DB Size", "r")
74+
x.set_field_align("Avg Obj Size", "r")
75+
x.set_field_align("Index Size", "r")
76+
x.set_padding_width(1)
77+
78+
print
79+
80+
for db in all_db_stats:
81+
db_stats = all_db_stats[db]
82+
count = 0
83+
for stat in db_stats:
84+
count += stat["count"]
85+
x.add_row([stat["ns"], stat["count"], "%0.1f%%" % ((stat["size"] / float(summary_stats["size"])) * 100),
86+
convert_bytes(stat["size"]),
87+
convert_bytes(stat.get("avgObjSize", 0)),
88+
stat.get("nindexes", 0),
89+
convert_bytes(stat.get("totalIndexSize", 0))])
90+
91+
print
92+
x.printt(sortby="% Size")
93+
print "Total Documents:", summary_stats["count"]
94+
print "Total Data Size:", convert_bytes(summary_stats["size"])
95+
print "Total Index Size:", convert_bytes(summary_stats["indexSize"])
96+
97+
ram_headroom = psutil.phymem_usage()[0] - summary_stats["indexSize"]
98+
print "RAM Headroom:", convert_bytes(ram_headroom)
99+
print "RAM Used: %s (%s%%)" % (convert_bytes(psutil.phymem_usage()[1]), psutil.phymem_usage()[3])
100+
print "Available RAM Headroom:", convert_bytes((100 - psutil.phymem_usage()[3]) / 100 * ram_headroom)
101+
102+
if __name__ == "__main__":
103+
main()

‎mongodbtools/index_stats.py

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
This script prints some basic collection stats about the size of the
5+
collections and their indexes.
6+
"""
7+
8+
from prettytable import PrettyTable
9+
import psutil
10+
from pymongo import Connection
11+
from pymongo import ReadPreference
12+
13+
connection = Connection(read_preference=ReadPreference.SECONDARY)
14+
15+
def compute_signature(index):
16+
signature = index["ns"]
17+
for key in index["key"]:
18+
signature += "%s_%s" % (key, index["key"][key])
19+
return signature
20+
21+
def get_collection_stats(database, collection):
22+
print "Checking DB: %s" % collection.full_name
23+
return database.command("collstats", collection.name)
24+
25+
# From http://www.5dollarwhitebox.org/drupal/node/84
26+
def convert_bytes(bytes):
27+
bytes = float(bytes)
28+
if bytes >= 1099511627776:
29+
terabytes = bytes / 1099511627776
30+
size = '%.2fT' % terabytes
31+
elif bytes >= 1073741824:
32+
gigabytes = bytes / 1073741824
33+
size = '%.2fG' % gigabytes
34+
elif bytes >= 1048576:
35+
megabytes = bytes / 1048576
36+
size = '%.2fM' % megabytes
37+
elif bytes >= 1024:
38+
kilobytes = bytes / 1024
39+
size = '%.2fK' % kilobytes
40+
else:
41+
size = '%.2fb' % bytes
42+
return size
43+
44+
def main():
45+
summary_stats = {
46+
"count" : 0,
47+
"size" : 0,
48+
"indexSize" : 0
49+
}
50+
all_stats = []
51+
52+
all_db_stats = {}
53+
for db in connection.database_names():
54+
# FIXME: Add an option to include oplog stats.
55+
if db == "local":
56+
continue
57+
58+
database = connection[db]
59+
all_db_stats[database.name] = []
60+
for collection_name in database.collection_names():
61+
stats = get_collection_stats(database, database[collection_name])
62+
all_stats.append(stats)
63+
all_db_stats[database.name].append(stats)
64+
65+
summary_stats["count"] += stats["count"]
66+
summary_stats["size"] += stats["size"]
67+
summary_stats["indexSize"] += stats.get("totalIndexSize", 0)
68+
69+
x = PrettyTable(["Collection", "Index","% Size", "Index Size"])
70+
x.set_field_align("Collection", "l")
71+
x.set_field_align("Index", "l")
72+
x.set_field_align("% Size", "r")
73+
x.set_field_align("Index Size", "r")
74+
x.set_padding_width(1)
75+
76+
print
77+
78+
index_size_mapping = {}
79+
for db in all_db_stats:
80+
db_stats = all_db_stats[db]
81+
count = 0
82+
for stat in db_stats:
83+
count += stat["count"]
84+
for index in stat["indexSizes"]:
85+
index_size = stat["indexSizes"].get(index, 0)
86+
row = [stat["ns"], index,
87+
"%0.1f%%" % ((index_size / float(summary_stats["indexSize"])) * 100),
88+
convert_bytes(index_size)]
89+
index_size_mapping[index_size] = row
90+
x.add_row(row)
91+
92+
93+
print "Index Overview"
94+
x.printt(sortby="Collection")
95+
96+
print
97+
print "Top 5 Largest Indexes"
98+
x = PrettyTable(["Collection", "Index","% Size", "Index Size"])
99+
x.set_field_align("Collection", "l")
100+
x.set_field_align("Index", "l")
101+
x.set_field_align("% Size", "r")
102+
x.set_field_align("Index Size", "r")
103+
x.set_padding_width(1)
104+
105+
top_five_indexes = sorted(index_size_mapping.keys(), reverse=True)[0:5]
106+
for size in top_five_indexes:
107+
x.add_row(index_size_mapping.get(size))
108+
x.printt()
109+
print
110+
111+
print "Total Documents:", summary_stats["count"]
112+
print "Total Data Size:", convert_bytes(summary_stats["size"])
113+
print "Total Index Size:", convert_bytes(summary_stats["indexSize"])
114+
115+
ram_headroom = psutil.phymem_usage()[0] - summary_stats["indexSize"]
116+
print "RAM Headroom:", convert_bytes(ram_headroom)
117+
print "Available RAM Headroom:", convert_bytes((100 - psutil.phymem_usage()[3]) / 100 * ram_headroom)
118+
119+
if __name__ == "__main__":
120+
main()

‎mongodbtools/query/__init__.py

Whitespace-only changes.

‎query/helpers.py ‎mongodbtools/query/helpers.py

+22-16
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,23 @@ def bson_iter(bson_file):
2525
raise InvalidBSON("bad eoo")
2626
yield bson._bson_to_dict(size_str + obj, dict, True)[0]
2727

28+
def _deep_get(obj, field):
29+
parts = field.split(".")
30+
if len(parts) == 1:
31+
return obj.get(field)
32+
33+
last_value = {}
34+
for part in parts[0:-1]:
35+
last_value = obj.get(part)
36+
37+
if not last_value:
38+
return False
39+
40+
if isinstance(last_value, dict):
41+
return last_value.get(parts[-1])
42+
else:
43+
return getattr(last_value, parts[-1])
44+
2845
def groupby(iterator, field):
2946
"""
3047
Returns dictionary with the keys beign the field to group by
@@ -34,8 +51,10 @@ def groupby(iterator, field):
3451
for example.
3552
"""
3653
groups = {}
37-
for k, g in itertools.groupby(iterator, lambda x: x.get(field)):
38-
groups.setdefault(k, []).append(g)
54+
for k, g in itertools.groupby(iterator, lambda x: _deep_get(x, field)):
55+
items = groups.setdefault(k, [])
56+
for item in g:
57+
items.append(item)
3958
return groups
4059

4160
def filter(iterator, field, value):
@@ -45,18 +64,5 @@ def filter(iterator, field, value):
4564
The field can be a nested field like a.b.c and it will descend into the
4665
embedded documents.
4766
"""
48-
def deep_get(obj, field, value):
49-
parts = field.split(".")
50-
if len(parts) == 1:
51-
return obj.get(field) == value
52-
53-
last_value = {}
54-
for part in parts[0:-1]:
55-
last_value = obj.get(part)
56-
57-
if not last_value:
58-
return False
59-
60-
return last_value.get(parts[-1]) == value
6167

62-
return itertools.ifilter(lambda x: deep_get(x, field, value), iterator)
68+
return itertools.ifilter(lambda x: _deep_get(x, field) == value, iterator)

‎mongodbtools/query/parser.py

+146
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# simpleSQL.py
2+
#
3+
# simple demo of using the parsing library to do simple-minded SQL parsing
4+
# could be extended to include where clauses etc.
5+
#
6+
# Copyright (c) 2003, Paul McGuire
7+
#
8+
# Originally from http://pyparsing.wikispaces.com/file/view/simpleSQL.py
9+
10+
from pyparsing import Literal, CaselessLiteral, Word, upcaseTokens, delimitedList, Optional, \
11+
Combine, Group, alphas, nums, alphanums, ParseException, Forward, oneOf, quotedString, \
12+
ZeroOrMore, restOfLine, Keyword
13+
14+
def test( str ):
15+
print str,"->"
16+
try:
17+
tokens = simpleSQL.parseString( str )
18+
print "tokens = ", tokens
19+
print "tokens.columns =", tokens.columns
20+
print "tokens.tables =", tokens.tables
21+
print "tokens.where =", tokens.where
22+
except ParseException, err:
23+
print " "*err.loc + "^\n" + err.msg
24+
print err
25+
print
26+
27+
28+
# define SQL tokens
29+
selectStmt = Forward()
30+
selectToken = Keyword("select", caseless=True)
31+
fromToken = Keyword("from", caseless=True)
32+
whereToken = Keyword("where", caseless=True)
33+
34+
ident = Word( alphas+"_", alphanums + "_$." ).setName("identifier")
35+
columnName = delimitedList( ident, ".", combine=True )
36+
columnNameList = Group( delimitedList( columnName ) )
37+
tableName = delimitedList( ident, ".", combine=True )
38+
tableNameList = Group( delimitedList( tableName ) )
39+
40+
whereExpression = Forward()
41+
and_ = Keyword("and", caseless=True)
42+
or_ = Keyword("or", caseless=True)
43+
in_ = Keyword("in", caseless=True)
44+
45+
E = CaselessLiteral("E")
46+
binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
47+
arithSign = Word("+-",exact=1)
48+
realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) |
49+
( "." + Word(nums) ) ) +
50+
Optional( E + Optional(arithSign) + Word(nums) ) )
51+
intNum = Combine( Optional(arithSign) + Word( nums ) +
52+
Optional( E + Optional("+") + Word(nums) ) )
53+
54+
columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions
55+
whereCondition = Group(
56+
( columnName + binop + columnRval ) |
57+
( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) |
58+
( "(" + whereExpression + ")" )
59+
)
60+
whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression )
61+
62+
# define the grammar
63+
selectStmt << ( selectToken +
64+
( '*' | columnNameList ).setResultsName( "columns" ) +
65+
fromToken +
66+
tableNameList.setResultsName( "tables" ) +
67+
Optional( Group( whereToken + whereExpression ), "" ).setResultsName("where") )
68+
69+
simpleSQL = selectStmt
70+
71+
# define Oracle comment format, and ignore them
72+
oracleSqlComment = "--" + restOfLine
73+
simpleSQL.ignore( oracleSqlComment )
74+
75+
76+
"""
77+
test( "SELECT * from XYZZY, ABC" )
78+
test( "select * from SYS.XYZZY" )
79+
test( "Select A from Sys.dual" )
80+
test( "Select A,B,C from Sys.dual" )
81+
test( "Select A, B, C from Sys.dual" )
82+
test( "Select A, B, C from Sys.dual, Table2 " )
83+
test( "Xelect A, B, C from Sys.dual" )
84+
test( "Select A, B, C frox Sys.dual" )
85+
test( "Select" )
86+
test( "Select &&& frox Sys.dual" )
87+
test( "Select A from Sys.dual where a in ('RED','GREEN','BLUE')" )
88+
test( "Select A from Sys.dual where a in ('RED','GREEN','BLUE') and b in (10,20,30)" )
89+
test( "Select A,b from table1,table2 where table1.id eq table2.id -- test out comparison operators" )
90+
test( "Select * from User, RemoteAccount where user._id = user.user_id)" )
91+
92+
93+
Test output:
94+
>pythonw -u simpleSQL.py
95+
SELECT * from XYZZY, ABC ->
96+
tokens = ['select', '*', 'from', ['XYZZY', 'ABC']]
97+
tokens.columns = *
98+
tokens.tables = ['XYZZY', 'ABC']
99+
100+
select * from SYS.XYZZY ->
101+
tokens = ['select', '*', 'from', ['SYS.XYZZY']]
102+
tokens.columns = *
103+
tokens.tables = ['SYS.XYZZY']
104+
105+
Select A from Sys.dual ->
106+
tokens = ['select', ['A'], 'from', ['SYS.DUAL']]
107+
tokens.columns = ['A']
108+
tokens.tables = ['SYS.DUAL']
109+
110+
Select A,B,C from Sys.dual ->
111+
tokens = ['select', ['A', 'B', 'C'], 'from', ['SYS.DUAL']]
112+
tokens.columns = ['A', 'B', 'C']
113+
tokens.tables = ['SYS.DUAL']
114+
115+
Select A, B, C from Sys.dual ->
116+
tokens = ['select', ['A', 'B', 'C'], 'from', ['SYS.DUAL']]
117+
tokens.columns = ['A', 'B', 'C']
118+
tokens.tables = ['SYS.DUAL']
119+
120+
Select A, B, C from Sys.dual, Table2 ->
121+
tokens = ['select', ['A', 'B', 'C'], 'from', ['SYS.DUAL', 'TABLE2']]
122+
tokens.columns = ['A', 'B', 'C']
123+
tokens.tables = ['SYS.DUAL', 'TABLE2']
124+
125+
Xelect A, B, C from Sys.dual ->
126+
^
127+
Expected 'select'
128+
Expected 'select' (0), (1,1)
129+
130+
Select A, B, C frox Sys.dual ->
131+
^
132+
Expected 'from'
133+
Expected 'from' (15), (1,16)
134+
135+
Select ->
136+
^
137+
Expected '*'
138+
Expected '*' (6), (1,7)
139+
140+
Select &&& frox Sys.dual ->
141+
^
142+
Expected '*'
143+
Expected '*' (7), (1,8)
144+
145+
>Exit code: 0
146+
"""

‎mongodbtools/redundant_indexes.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
This is a simple script to print out potentially redundant indexes in a mongdb instance.
5+
For example, if an index is defined on {field1:1,field2:1} and there is another index
6+
with just fields {field1:1}, the latter index is not needed since the first index already
7+
indexes the necessary fields.
8+
"""
9+
from pymongo import Connection
10+
11+
def main():
12+
connection = Connection()
13+
14+
def compute_signature(index):
15+
signature = index["ns"]
16+
for key in index["key"]:
17+
try:
18+
signature += "%s_%s" % (key, int(index["key"][key]))
19+
except ValueError:
20+
signature += "%s_%s" % (key, index["key"][key])
21+
return signature
22+
23+
def report_redundant_indexes(current_db):
24+
print "Checking DB: %s" % current_db.name
25+
indexes = current_db.system.indexes.find()
26+
index_map = {}
27+
for index in indexes:
28+
signature = compute_signature(index)
29+
index_map[signature] = index
30+
31+
for signature in index_map.keys():
32+
for other_sig in index_map.keys():
33+
if signature == other_sig:
34+
continue
35+
if other_sig.startswith(signature):
36+
print "Index %s[%s] may be redundant with %s[%s]" % (
37+
index_map[signature]["ns"],
38+
index_map[signature]["name"],
39+
index_map[other_sig]["ns"],
40+
index_map[other_sig]["name"])
41+
42+
for db in connection.database_names():
43+
report_redundant_indexes(connection[db])
44+
45+
if __name__ == "__main__":
46+
main()

‎redundant-indexes.py

-39
This file was deleted.

‎setup.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from setuptools import setup, find_packages
2+
3+
version='0.1'
4+
5+
packages = find_packages(exclude=['ez_setup', 'examples', 'tests'])
6+
print packages
7+
setup(
8+
name='mongodbtools',
9+
version=version,
10+
description='Python tools for working with MongoDB',
11+
author='Jason Wilder',
12+
author_email='code@jasonwilder.com',
13+
maintainer='Jason Wilder',
14+
license='MIT',
15+
url='http://github.com/jwilder/mongodb-tools',
16+
packages=packages,
17+
entry_points = """\
18+
[console_scripts]
19+
collection-stats=mongodbtools.collection_stats:main
20+
index-stats=mongodbtools.index_stats:main
21+
redundant-indexes=mongodbtools.redundant_indexes:main
22+
""",
23+
install_requires=[
24+
'pymongo>=2.1',
25+
'PrettyTable',
26+
'psutil==0.3.0',
27+
'mongoengine==0.5.0'
28+
],
29+
)

0 commit comments

Comments
 (0)
Please sign in to comment.