Skip to content

Commit fef91d0

Browse files
Adding a few more docs
1 parent 47137c1 commit fef91d0

File tree

1 file changed

+74
-6
lines changed

1 file changed

+74
-6
lines changed

codeflare/pipelines/Datamodel.py

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,27 @@ class XYRef:
5757
"""
5858
Holder class that maintains a pointer/reference to X and y. The goal of this is to provide
5959
a holder to the object references of Ray. This is used for passing outputs from a transform/fit
60-
to the next stage of the pipeline. Since the references can be potentially in flight (or being
60+
to the next stage of the pipeline. Since the object references can be potentially in flight (or being
6161
computed), these holders are essential to the pipeline constructs.
6262
6363
It also holds the state of the node itself, with the previous state of the node before a transform
6464
operation is applied being held along with the next state. It also holds the previous
6565
XYRef instances. In essence, this holder class is a bunch of pointers, but it is enough to reconstruct
6666
the entire pipeline through appropriate traversals.
67+
68+
NOTE: Default constructor takes pointer to X and y. The more advanced constructs are pointer
69+
holders for the pipeline during its execution and are not meant to be used outside by developers.
70+
71+
Examples
72+
--------
73+
.. code-block:: python
74+
75+
x = np.array([1.0, 2.0, 4.0, 5.0])
76+
y = np.array(['odd', 'even', 'even', 'odd'])
77+
x_ref = ray.put(x)
78+
y_ref = ray.put(y)
79+
80+
xy_ref = XYRef(x_ref, y_ref)
6781
"""
6882

6983
def __init__(self, Xref: ray.ObjectRef, yref: ray.ObjectRef, prev_node_state_ref: ray.ObjectRef=None, curr_node_state_ref: ray.ObjectRef=None, prev_Xyrefs = None):
@@ -124,16 +138,47 @@ def get_prev_xyrefs(self):
124138

125139

126140
class NodeInputType(Enum):
141+
"""
142+
Defines the node input types, currently, it supports an OR and AND node. An OR node is backed by an
143+
Estimator and an AND node is backed by an arbitrary lambda defined by an AndFunc. The key difference
144+
is that for an OR node, the parallelism is defined at a single XYRef object, whereas for an AND node,
145+
the parallelism is defined on a collection of objects coming "into" the AND node.
146+
147+
For details on parallelism and pipeline semantics, the reader is directed to the pipeline semantics
148+
introduction of the User guide.
149+
"""
127150
OR = 0,
128151
AND = 1
129152

130153

131154
class NodeFiringType(Enum):
155+
"""
156+
Defines the "firing" semantics of a node, there are two types of firing semantics, ANY and ALL. ANY
157+
firing semantics means that upon the availability of a single object, the node will start executing
158+
its work. Whereas, on ALL semantics, the node has to wait for ALL the objects ot be materialized
159+
before the computation can begin, i.e. it is blocking.
160+
161+
For details on firing and pipeline semantics, the reader is directed to the pipeline semantics
162+
introduction of the User guide.
163+
"""
132164
ANY = 0,
133165
ALL = 1
134166

135167

136168
class NodeStateType(Enum):
169+
"""
170+
Defines the state type of a node, there are 4 types of state, which are STATELESS, IMMUTABLE, MUTABLE_SEQUENTIAL
171+
and MUTABLE_AGGREGATE.
172+
173+
A STATELESS node is one that keeps no state and can be called any number of times without any change to the "model"
174+
or "function" state.
175+
176+
A IMMUTABLE node is one that once a model has "fitted" cannot change, i.e. there is no partial fit available.
177+
178+
A MUTABLE_SEQUENTIAL node is one that can be updated with a sequence of input object(s) or a stream.
179+
180+
A MUTABLE_AGGREGATE node is one that can be updated in batches.
181+
"""
137182
STATELESS = 0,
138183
IMMUTABLE = 1,
139184
MUTABLE_SEQUENTIAL = 2,
@@ -156,16 +201,36 @@ def __init__(self, node_name, node_input_type: NodeInputType, node_firing_type:
156201
def __str__(self):
157202
return self.__node_name__
158203

159-
def get_node_name(self):
204+
def get_node_name(self) -> str:
205+
"""
206+
Returns the node name
207+
208+
:return: The name of this node
209+
"""
160210
return self.__node_name__
161211

162-
def get_node_input_type(self):
212+
def get_node_input_type(self) -> NodeInputType:
213+
"""
214+
Return the node input type
215+
216+
:return: The node input type
217+
"""
163218
return self.__node_input_type__
164219

165-
def get_node_firing_type(self):
220+
def get_node_firing_type(self) -> NodeFiringType:
221+
"""
222+
Return the node firing type
223+
224+
:return: The node firing type
225+
"""
166226
return self.__node_firing_type__
167227

168-
def get_node_state_type(self):
228+
def get_node_state_type(self) -> NodeStateType:
229+
"""
230+
Return the node state type
231+
232+
:return: The node state type
233+
"""
169234
return self.__node_state_type__
170235

171236
@abstractmethod
@@ -196,8 +261,11 @@ def __eq__(self, other):
196261

197262
class EstimatorNode(Node):
198263
"""
199-
Or node, which is the basic node that would be the equivalent of any SKlearn pipeline
264+
Basic estimator node, which is the basic node that would be the equivalent of any SKlearn pipeline
200265
stage. This node is initialized with an estimator that needs to extend sklearn.BaseEstimator.
266+
267+
This estimator node is typically an OR node, with ANY firing semantics, and IMMUTABLE state. For
268+
partial fit, we will have to define a different node type to keep semantics very clear.
201269
"""
202270

203271
def __init__(self, node_name: str, estimator: BaseEstimator):

0 commit comments

Comments
 (0)