Skip to content

Commit

Permalink
Merge pull request blaze#1164 from cpcloud/eight-point-one
Browse files Browse the repository at this point in the history
0.8.1 Release
  • Loading branch information
cpcloud committed Jul 8, 2015
2 parents a765031 + 43cf825 commit f409f8b
Show file tree
Hide file tree
Showing 9 changed files with 225 additions and 69 deletions.
87 changes: 62 additions & 25 deletions blaze/expr/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
from ..compatibility import zip_longest


__all__ = ['Sort', 'Distinct', 'Head', 'Merge', 'IsIn', 'distinct', 'merge',
'head', 'sort', 'Join', 'join', 'transform', 'Concat', 'concat']
__all__ = ['Sort', 'Distinct', 'Head', 'Merge', 'IsIn', 'isin', 'distinct',
'merge', 'head', 'sort', 'Join', 'join', 'transform', 'Concat',
'concat']


class Sort(Expr):
Expand Down Expand Up @@ -60,16 +61,18 @@ def __str__(self):


def sort(child, key=None, ascending=True):
""" Sort collection
""" Sort a collection
Parameters
----------
key: string, list of strings, Expr
Defines by what you want to sort. Either:
A single column string, ``t.sort('amount')``
A list of column strings, ``t.sort(['name', 'amount'])``
A Table Expression, ``t.sort(-t.amount)``
ascending: bool
key : str, list of str, or Expr
Defines by what you want to sort.
* A single column string: ``t.sort('amount')``
* A list of column strings: ``t.sort(['name', 'amount'])``
* An expression: ``t.sort(-t.amount)``
ascending : bool, optional
Determines order of the sort
"""
if not isrecord(child.dshape.measure):
Expand All @@ -81,8 +84,7 @@ def sort(child, key=None, ascending=True):

class Distinct(Expr):

"""
Removes duplicate rows from the table, so every row is distinct
""" Remove duplicate elements from an expression
Examples
--------
Expand Down Expand Up @@ -120,9 +122,12 @@ def distinct(expr):
return Distinct(expr)


distinct.__doc__ = Distinct.__doc__


class Head(Expr):

""" First ``n`` elements of collection
""" First `n` elements of collection
Examples
--------
Expand Down Expand Up @@ -258,6 +263,9 @@ def _leaves(self):
return list(unique(tconcat(i._leaves() for i in self.children)))


merge.__doc__ = Merge.__doc__


def unpack(l):
""" Unpack items from collections of nelements 1
Expand All @@ -278,11 +286,11 @@ class Join(Expr):
Parameters
----------
lhs : Expr
rhs : Expr
lhs, rhs : Expr
Expressions to join
on_left : string
on_right : string
suffixes: pair
suffixes: pair of strings
Examples
--------
Expand All @@ -291,9 +299,11 @@ class Join(Expr):
>>> amounts = symbol('amounts', 'var * {amount: int, id: int}')
Join tables based on shared column name
>>> joined = join(names, amounts, 'id')
Join based on different column names
>>> amounts = symbol('amounts', 'var * {amount: int, acctNumber: int}')
>>> joined = join(names, amounts, 'id', 'acctNumber')
Expand Down Expand Up @@ -338,6 +348,7 @@ def schema(self):
dshape("{name: string, amount: int32, id: ?int32}")
Overlapping but non-joined fields append _left, _right
>>> a = symbol('a', 'var * {x: int, y: int}')
>>> b = symbol('b', 'var * {x: int, y: int}')
>>> join(a, b, 'x').fields
Expand Down Expand Up @@ -446,31 +457,33 @@ class Concat(Expr):
Parameters
----------
lhs : Expr
rhs : Expr
lhs, rhs : Expr
Collections to concatenate
axis : int, optional
The axis to concatenate on.
Examples
--------
>>> from blaze import symbol
Vertically stack tables:
>>> names = symbol('names', '5 * {name: string, id: int32}')
>>> more_names = symbol('more_names', '7 * {name: string, id: int32}')
Vertically stack these tables.
>>> stacked = concat(names, more_names)
>>> stacked.dshape
dshape("12 * {name: string, id: int32}")
Vertically stack matrices:
>>> mat_a = symbol('a', '3 * 5 * int32')
>>> mat_b = symbol('b', '3 * 5 * int32')
Vertically stack these matricies.
>>> vstacked = concat(mat_a, mat_b, axis=0)
>>> vstacked.dshape
dshape("6 * 5 * int32")
Horizontally stack these matricies.
Horizontally stack matrices:
>>> hstacked = concat(mat_a, mat_b, axis=1)
>>> hstacked.dshape
dshape("3 * 10 * int32")
Expand Down Expand Up @@ -534,22 +547,46 @@ def concat(lhs, rhs, axis=0):


class IsIn(ElemWise):
"""Return a boolean expression indicating whether another expression
"""Check if an expression contains values from a set.
Return a boolean expression indicating whether another expression
contains values that are members of a collection.
Parameters
----------
expr : Expr
Expression whose elements to check for membership in `keys`
keys : Sequence
Elements to test against. Blaze stores this as a ``frozenset``.
Examples
--------
Check if a vector contains any of 1, 2 or 3:
>>> from blaze import symbol
>>> t = symbol('t', '10 * int64')
>>> expr = t.isin([1, 2, 3])
>>> expr.dshape
dshape("10 * bool")
"""
__slots__ = '_hash', '_child', '_keys'

@property
def schema(self):
return datashape.bool_

def __str__(self):
return '%s.%s(%s)' % (self._child, type(self).__name__.lower(),
self._keys)


def isin(child, keys):
def isin(expr, keys):
if isinstance(keys, Expr):
raise TypeError('keys argument cannot be an expression, '
'it must be an iterable object such as a list, '
'tuple or set')
return IsIn(child, frozenset(keys))
return IsIn(expr, frozenset(keys))


isin.__doc__ = IsIn.__doc__
Expand Down
5 changes: 3 additions & 2 deletions blaze/expr/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

__all__ = ['Expr', 'ElemWise', 'Field', 'Symbol', 'discover', 'Projection',
'projection', 'Selection', 'selection', 'Label', 'label', 'Map',
'ReLabel', 'relabel', 'Apply', 'Slice', 'shape', 'ndim', 'label',
'symbol', 'Coerce']
'ReLabel', 'relabel', 'Apply', 'apply', 'Slice', 'shape', 'ndim',
'label', 'symbol', 'Coerce', 'coerce']


_attr_cache = dict()
Expand Down Expand Up @@ -688,6 +688,7 @@ class Apply(Expr):
See Also
--------
blaze.expr.expressions.Map
"""
__slots__ = '_hash', '_child', 'func', '_dshape', '_splittable'
Expand Down
32 changes: 17 additions & 15 deletions blaze/expr/split_apply_combine.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
from __future__ import absolute_import, division, print_function

import re

from .core import common_subexpression
from .expressions import Expr, symbol
from .expressions import Expr
from .reductions import Reduction, Summary, summary
from ..dispatch import dispatch
from .expressions import dshape_method_list

from datashape import dshape, Record, Option, Unit, var

__all__ = ['by', 'By', 'count_values']


def _names_and_types(expr):
schema = expr.dshape.measure
if isinstance(schema, Option):
Expand All @@ -25,6 +30,7 @@ class By(Expr):
Examples
--------
>>> from blaze import symbol
>>> t = symbol('t', 'var * {name: string, amount: int, id: int}')
>>> e = by(t['name'], total=t['amount'].sum())
Expand Down Expand Up @@ -59,26 +65,25 @@ def dshape(self):
return var * self.schema

def __str__(self):
s = 'by('
s += str(self.grouper) + ', '
if isinstance(self.apply, Summary):
s += str(self.apply)[len('summary('):-len(')')]
else:
s += str(self.apply)
s += ')'
return s
return '%s(%s, %s)' % (type(self).__name__.lower(),
self.grouper,
re.sub(r'^summary\((.*)\)$', r'\1',
str(self.apply)))


@dispatch(Expr, Reduction)
def by(grouper, s):
raise ValueError("This syntax has been removed.\n"
"Please name reductions with keyword arguments.\n"
"Before: by(t.name, t.amount.sum())\n"
"After: by(t.name, total=t.amount.sum())")
"Please name reductions with keyword arguments.\n"
"Before: by(t.name, t.amount.sum())\n"
"After: by(t.name, total=t.amount.sum())")


@dispatch(Expr, Summary)
def by(grouper, s):
return By(grouper, s)


@dispatch(Expr)
def by(grouper, **kwargs):
return By(grouper, summary(**kwargs))
Expand All @@ -97,9 +102,6 @@ def count_values(expr, sort=True):
return result


from datashape.predicates import iscollection
from .expressions import dshape_method_list

dshape_method_list.extend([
(lambda ds: len(ds.shape) == 1, set([count_values])),
])
4 changes: 2 additions & 2 deletions blaze/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def to_tree(expr, names=None):
See Also
--------
blaze.server.server.from_tree
from_tree
"""
if names and expr in names:
return names[expr]
Expand Down Expand Up @@ -292,7 +292,7 @@ def from_tree(expr, namespace=None):
See Also
--------
blaze.server.server.to_tree
to_tree
"""
if isinstance(expr, dict):
op, args = expr['op'], expr['args']
Expand Down
Loading

0 comments on commit f409f8b

Please sign in to comment.