@@ -57,13 +57,27 @@ class XYRef:
57
57
"""
58
58
Holder class that maintains a pointer/reference to X and y. The goal of this is to provide
59
59
a holder to the object references of Ray. This is used for passing outputs from a transform/fit
60
- to the next stage of the pipeline. Since the references can be potentially in flight (or being
60
+ to the next stage of the pipeline. Since the object references can be potentially in flight (or being
61
61
computed), these holders are essential to the pipeline constructs.
62
62
63
63
It also holds the state of the node itself, with the previous state of the node before a transform
64
64
operation is applied being held along with the next state. It also holds the previous
65
65
XYRef instances. In essence, this holder class is a bunch of pointers, but it is enough to reconstruct
66
66
the entire pipeline through appropriate traversals.
67
+
68
+ NOTE: Default constructor takes pointer to X and y. The more advanced constructs are pointer
69
+ holders for the pipeline during its execution and are not meant to be used outside by developers.
70
+
71
+ Examples
72
+ --------
73
+ .. code-block:: python
74
+
75
+ x = np.array([1.0, 2.0, 4.0, 5.0])
76
+ y = np.array(['odd', 'even', 'even', 'odd'])
77
+ x_ref = ray.put(x)
78
+ y_ref = ray.put(y)
79
+
80
+ xy_ref = XYRef(x_ref, y_ref)
67
81
"""
68
82
69
83
def __init__ (self , Xref : ray .ObjectRef , yref : ray .ObjectRef , prev_node_state_ref : ray .ObjectRef = None , curr_node_state_ref : ray .ObjectRef = None , prev_Xyrefs = None ):
@@ -124,16 +138,47 @@ def get_prev_xyrefs(self):
124
138
125
139
126
140
class NodeInputType (Enum ):
141
+ """
142
+ Defines the node input types, currently, it supports an OR and AND node. An OR node is backed by an
143
+ Estimator and an AND node is backed by an arbitrary lambda defined by an AndFunc. The key difference
144
+ is that for an OR node, the parallelism is defined at a single XYRef object, whereas for an AND node,
145
+ the parallelism is defined on a collection of objects coming "into" the AND node.
146
+
147
+ For details on parallelism and pipeline semantics, the reader is directed to the pipeline semantics
148
+ introduction of the User guide.
149
+ """
127
150
OR = 0 ,
128
151
AND = 1
129
152
130
153
131
154
class NodeFiringType (Enum ):
155
+ """
156
+ Defines the "firing" semantics of a node, there are two types of firing semantics, ANY and ALL. ANY
157
+ firing semantics means that upon the availability of a single object, the node will start executing
158
+ its work. Whereas, on ALL semantics, the node has to wait for ALL the objects ot be materialized
159
+ before the computation can begin, i.e. it is blocking.
160
+
161
+ For details on firing and pipeline semantics, the reader is directed to the pipeline semantics
162
+ introduction of the User guide.
163
+ """
132
164
ANY = 0 ,
133
165
ALL = 1
134
166
135
167
136
168
class NodeStateType (Enum ):
169
+ """
170
+ Defines the state type of a node, there are 4 types of state, which are STATELESS, IMMUTABLE, MUTABLE_SEQUENTIAL
171
+ and MUTABLE_AGGREGATE.
172
+
173
+ A STATELESS node is one that keeps no state and can be called any number of times without any change to the "model"
174
+ or "function" state.
175
+
176
+ A IMMUTABLE node is one that once a model has "fitted" cannot change, i.e. there is no partial fit available.
177
+
178
+ A MUTABLE_SEQUENTIAL node is one that can be updated with a sequence of input object(s) or a stream.
179
+
180
+ A MUTABLE_AGGREGATE node is one that can be updated in batches.
181
+ """
137
182
STATELESS = 0 ,
138
183
IMMUTABLE = 1 ,
139
184
MUTABLE_SEQUENTIAL = 2 ,
@@ -156,16 +201,36 @@ def __init__(self, node_name, node_input_type: NodeInputType, node_firing_type:
156
201
def __str__ (self ):
157
202
return self .__node_name__
158
203
159
- def get_node_name (self ):
204
+ def get_node_name (self ) -> str :
205
+ """
206
+ Returns the node name
207
+
208
+ :return: The name of this node
209
+ """
160
210
return self .__node_name__
161
211
162
- def get_node_input_type (self ):
212
+ def get_node_input_type (self ) -> NodeInputType :
213
+ """
214
+ Return the node input type
215
+
216
+ :return: The node input type
217
+ """
163
218
return self .__node_input_type__
164
219
165
- def get_node_firing_type (self ):
220
+ def get_node_firing_type (self ) -> NodeFiringType :
221
+ """
222
+ Return the node firing type
223
+
224
+ :return: The node firing type
225
+ """
166
226
return self .__node_firing_type__
167
227
168
- def get_node_state_type (self ):
228
+ def get_node_state_type (self ) -> NodeStateType :
229
+ """
230
+ Return the node state type
231
+
232
+ :return: The node state type
233
+ """
169
234
return self .__node_state_type__
170
235
171
236
@abstractmethod
@@ -196,8 +261,11 @@ def __eq__(self, other):
196
261
197
262
class EstimatorNode (Node ):
198
263
"""
199
- Or node, which is the basic node that would be the equivalent of any SKlearn pipeline
264
+ Basic estimator node, which is the basic node that would be the equivalent of any SKlearn pipeline
200
265
stage. This node is initialized with an estimator that needs to extend sklearn.BaseEstimator.
266
+
267
+ This estimator node is typically an OR node, with ANY firing semantics, and IMMUTABLE state. For
268
+ partial fit, we will have to define a different node type to keep semantics very clear.
201
269
"""
202
270
203
271
def __init__ (self , node_name : str , estimator : BaseEstimator ):
0 commit comments