Skip to content

Commit 6c12297

Browse files
authored
[FSTORE-618] Expand current java docstrings to cover new hsfs functionalities (#932)
1 parent 482c207 commit 6c12297

12 files changed

+3387
-410
lines changed

java/pom.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,9 @@
398398
<source>1.8</source>
399399
<target>1.8</target>
400400
<sourcepath>${delombok.output}</sourcepath>
401+
<sourceFileExcludes>
402+
<exclude>**/MainClass.java</exclude>
403+
</sourceFileExcludes>
401404
</configuration>
402405

403406
<executions>

java/src/main/java/com/logicalclocks/hsfs/Feature.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,23 @@ public Feature(String name, String type, String onlineType, Boolean primary, Boo
133133
this.description = description;
134134
}
135135

136+
/**
137+
* Returns true if the feature has a complex type.
138+
*
139+
* <pre>
140+
* {@code // get feature store handle
141+
* FeatureStore fs = ...;
142+
* // get feature group handle
143+
* FeatureGroup fg = ...;
144+
* // get feature
145+
* Feature ft = fg.getFeature("feature_name");
146+
* // check if the feature has a complex type.
147+
* ft.isComplex();
148+
* }
149+
* </pre>
150+
*
151+
* @return boolean
152+
*/
136153
@JsonIgnore
137154
public boolean isComplex() {
138155
return Constants.COMPLEX_FEATURE_TYPES.stream().anyMatch(c -> type.toUpperCase().startsWith(c));
@@ -198,6 +215,7 @@ public Filter ge(Feature value) {
198215
* @deprecated
199216
* `in` method is deprecated. Use `isin` instead.
200217
*/
218+
@Deprecated
201219
public Filter in(Collection<?> collection) {
202220
return isin(collection);
203221
}

java/src/main/java/com/logicalclocks/hsfs/FeatureGroup.java

Lines changed: 601 additions & 169 deletions
Large diffs are not rendered by default.

java/src/main/java/com/logicalclocks/hsfs/FeatureStore.java

Lines changed: 532 additions & 80 deletions
Large diffs are not rendered by default.

java/src/main/java/com/logicalclocks/hsfs/FeatureView.java

Lines changed: 1401 additions & 42 deletions
Large diffs are not rendered by default.

java/src/main/java/com/logicalclocks/hsfs/StorageConnector.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,20 @@ public abstract class StorageConnector {
7777

7878
protected StorageConnectorApi storageConnectorApi = new StorageConnectorApi();
7979

80+
/**
81+
* Reads a query or a path into a dataframe using the storage connector.
82+
*
83+
* @param query By default, the storage connector will read the table configured together with the connector, if any.
84+
* It's possible to overwrite this by passing a SQL query here.
85+
* @param dataFormat When reading from object stores such as S3, HopsFS and ADLS, specify the file format to be read,
86+
* e.g. `csv`, `parquet`.
87+
* @param options Any additional key/value options to be passed to the connector.
88+
* @param path Path to be read from within the bucket of the storage connector. Not relevant for JDBC or database
89+
* based connectors such as Snowflake, JDBC or Redshift.
90+
* @return DataFrame.
91+
* @throws FeatureStoreException If ...
92+
* @throws IOException If ...
93+
*/
8094
public Object read(String query, String dataFormat, Map<String, String> options, String path)
8195
throws FeatureStoreException, IOException {
8296
return SparkEngine.getInstance().read(this, dataFormat, options, path);

java/src/main/java/com/logicalclocks/hsfs/StreamFeatureGroup.java

Lines changed: 763 additions & 69 deletions
Large diffs are not rendered by default.

java/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupEngine.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,10 @@ public class FeatureGroupEngine {
5858
* key.
5959
* @param writeOptions Additional write options as key-value pairs, defaults to empty Map.
6060
* @return Feature Group metadata object
61-
* @throws FeatureStoreException FeatureStoreException
62-
* @throws IOException IOException
63-
* @throws ParseException ParseException
61+
* @throws FeatureStoreException In case Client is not connected to Hopsworks, unable to identify date format and/or
62+
* no commit information was found for the feature group;
63+
* @throws IOException Generic IO exception.
64+
* @throws ParseException In case it's unable to parse HUDI and or statistics commit date string to date type.
6465
*/
6566
public FeatureGroup save(FeatureGroup featureGroup, Dataset<Row> dataset, List<String> partitionKeys,
6667
String hudiPrecombineKey, Map<String, String> writeOptions)

java/src/main/java/com/logicalclocks/hsfs/engine/SparkEngine.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -257,8 +257,8 @@ public void registerHudiTemporaryTable(HudiFeatureGroupAlias hudiFeatureGroupAli
257257
* @param writeOptions Additional write options as key-value pairs, defaults to empty Map
258258
* @param saveMode org.apache.spark.sql.saveMode: Append, Overwrite, ErrorIfExists, Ignore
259259
* @return Spark dataframe
260-
* @throws FeatureStoreException FeatureStoreException
261-
* @throws IOException IOException
260+
* @throws FeatureStoreException If Client is not connected to Hopsworks
261+
* @throws IOException Generic IO exception.
262262
*/
263263
public Dataset<Row>[] write(TrainingDataset trainingDataset, Query query, Map<String, String> queryReadOptions,
264264
Map<String, String> writeOptions, SaveMode saveMode) throws FeatureStoreException, IOException {
@@ -561,8 +561,8 @@ public <S> StreamingQuery writeStreamDataframe(FeatureGroupBase featureGroupBase
561561
* @param featureGroupBase FeatureGroupBase Feature Group base metadata object
562562
* @param dataset Spark DataFrame or RDD.
563563
* @return Spark DataFrame.
564-
* @throws FeatureStoreException FeatureStoreException
565-
* @throws IOException IOException
564+
* @throws FeatureStoreException If Client is not connected to Hopsworks
565+
* @throws IOException Generic IO exception.
566566
*/
567567
public Dataset<Row> encodeComplexFeatures(FeatureGroupBase featureGroupBase, Dataset<Row> dataset)
568568
throws FeatureStoreException, IOException {
@@ -584,8 +584,8 @@ public Dataset<Row> encodeComplexFeatures(FeatureGroupBase featureGroupBase, Dat
584584
* @param featureGroupBase FeatureGroupBase Feature Group base metadata object
585585
* @param dataset Spark DataFrame or RDD.
586586
* @return Spark DataFrame.
587-
* @throws FeatureStoreException FeatureStoreException
588-
* @throws IOException IOException
587+
* @throws FeatureStoreException If Client is not connected to Hopsworks
588+
* @throws IOException Generic IO exception.
589589
*/
590590
private Dataset<Row> onlineFeatureGroupToAvro(FeatureGroupBase featureGroupBase, Dataset<Row> dataset)
591591
throws FeatureStoreException, IOException {
@@ -676,7 +676,7 @@ public String profile(Dataset<Row> df) {
676676
}
677677

678678
public void setupConnectorHadoopConf(StorageConnector storageConnector)
679-
throws FeatureStoreException, IOException {
679+
throws IOException {
680680
if (storageConnector == null) {
681681
return;
682682
}

java/src/main/java/com/logicalclocks/hsfs/engine/hudi/HudiEngine.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ private FeatureGroupCommit getLastCommitMetadata(SparkSession sparkSession, Stri
211211

212212
private Map<String, String> setupHudiWriteOpts(FeatureGroupBase featureGroup, HudiOperationType operation,
213213
Map<String, String> writeOptions)
214-
throws IOException, FeatureStoreException {
214+
throws FeatureStoreException {
215215
Map<String, String> hudiArgs = new HashMap<String, String>();
216216

217217
hudiArgs.put(HUDI_TABLE_STORAGE_TYPE, HUDI_COPY_ON_WRITE);

java/src/main/java/com/logicalclocks/hsfs/metadata/FeatureGroupBase.java

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ public <T> T read() throws FeatureStoreException, IOException {
176176
*
177177
* @param name name of the tag
178178
* @param value value of the tag. The value of a tag can be any valid json - primitives, arrays or json objects.
179-
* @throws FeatureStoreException FeatureStoreException
180-
* @throws IOException IOException
179+
* @throws FeatureStoreException If Client is not connected to Hopsworks,
180+
* @throws IOException Generic IO exception.
181181
*/
182182
public void addTag(String name, Object value) throws FeatureStoreException, IOException {
183183
featureGroupBaseEngine.addTag(this, name, value);
@@ -187,8 +187,8 @@ public void addTag(String name, Object value) throws FeatureStoreException, IOEx
187187
* Get all tags of the feature group.
188188
*
189189
* @return map of tag name and values. The value of a tag can be any valid json - primitives, arrays or json objects
190-
* @throws FeatureStoreException FeatureStoreException
191-
* @throws IOException IOException
190+
* @throws FeatureStoreException If Client is not connected to Hopsworks,
191+
* @throws IOException Generic IO exception.
192192
*/
193193
@JsonIgnore
194194
public Map<String, Object> getTags() throws FeatureStoreException, IOException {
@@ -212,8 +212,8 @@ public Object getTag(String name) throws FeatureStoreException, IOException {
212212
* Delete a tag of the feature group.
213213
*
214214
* @param name name of the tag to be deleted
215-
* @throws FeatureStoreException FeatureStoreException
216-
* @throws IOException IOException
215+
* @throws FeatureStoreException If Client is not connected to Hopsworks,
216+
* @throws IOException Generic IO exception.
217217
*/
218218
public void deleteTag(String name) throws FeatureStoreException, IOException {
219219
featureGroupBaseEngine.deleteTag(this, name);
@@ -223,8 +223,8 @@ public void deleteTag(String name) throws FeatureStoreException, IOException {
223223
* Update the description of the feature group.
224224
*
225225
* @param description feature group description.
226-
* @throws FeatureStoreException FeatureStoreException
227-
* @throws IOException IOException
226+
* @throws FeatureStoreException If Client is not connected to Hopsworks,
227+
* @throws IOException Generic IO exception.
228228
*/
229229
public void updateDescription(String description) throws FeatureStoreException, IOException {
230230
featureGroupBaseEngine.updateDescription(this, description, this.getClass());
@@ -235,8 +235,8 @@ public void updateDescription(String description) throws FeatureStoreException,
235235
*
236236
* @param featureName Name of the feature
237237
* @param description Description of the feature
238-
* @throws FeatureStoreException FeatureStoreException
239-
* @throws IOException IOException
238+
* @throws FeatureStoreException If Client is not connected to Hopsworks,
239+
* @throws IOException Generic IO exception.
240240
*/
241241
public void updateFeatureDescription(String featureName, String description)
242242
throws FeatureStoreException, IOException {
@@ -250,9 +250,10 @@ public void updateFeatureDescription(String featureName, String description)
250250
* Currently only feature description updates are supported.
251251
*
252252
* @param features List of Feature metadata objects
253-
* @throws FeatureStoreException FeatureStoreException
254-
* @throws IOException IOException
255-
* @throws ParseException ParseException
253+
* @throws FeatureStoreException If Client is not connected to Hopsworks, unable to identify date format and/or
254+
* no commit information was found for this feature group;
255+
* @throws IOException Generic IO exception.
256+
* @throws ParseException In case it's unable to parse date string to date type.
256257
*/
257258
public void updateFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException {
258259
featureGroupBaseEngine.appendFeatures(this, features, this.getClass());
@@ -263,9 +264,10 @@ public void updateFeatures(List<Feature> features) throws FeatureStoreException,
263264
* Currently only feature description updates are supported.
264265
*
265266
* @param feature Feature metadata object
266-
* @throws FeatureStoreException FeatureStoreException
267-
* @throws IOException IOException
268-
* @throws ParseException ParseException
267+
* @throws FeatureStoreException If Client is not connected to Hopsworks, unable to identify date format and/or
268+
* no commit information was found for this feature group;
269+
* @throws IOException Generic IO exception.
270+
* @throws ParseException In case it's unable to parse date string to date type.
269271
*/
270272
public void updateFeatures(Feature feature) throws FeatureStoreException, IOException, ParseException {
271273
featureGroupBaseEngine.appendFeatures(this, Collections.singletonList(feature), this.getClass());
@@ -276,9 +278,10 @@ public void updateFeatures(Feature feature) throws FeatureStoreException, IOExce
276278
* It is only possible to append features to a feature group. Removing features is considered a breaking change.
277279
*
278280
* @param features list of Feature metadata objects
279-
* @throws FeatureStoreException FeatureStoreException
280-
* @throws IOException IOException
281-
* @throws ParseException ParseException
281+
* @throws FeatureStoreException If Client is not connected to Hopsworks, unable to identify date format and/or
282+
* no commit information was found for this feature group;
283+
* @throws IOException Generic IO exception.
284+
* @throws ParseException In case it's unable to parse date string to date type.
282285
*/
283286
public void appendFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException {
284287
featureGroupBaseEngine.appendFeatures(this, new ArrayList<>(features), this.getClass());
@@ -289,9 +292,10 @@ public void appendFeatures(List<Feature> features) throws FeatureStoreException,
289292
* It is only possible to append features to a feature group. Removing features is considered a breaking change.
290293
*
291294
* @param features List of Feature metadata objects
292-
* @throws FeatureStoreException FeatureStoreException
293-
* @throws IOException IOException
294-
* @throws ParseException ParseException
295+
* @throws FeatureStoreException If Client is not connected to Hopsworks, unable to identify date format and/or
296+
* no commit information was found for this feature group;
297+
* @throws IOException Generic IO exception.
298+
* @throws ParseException In case it's unable to parse date string to date type.
295299
*/
296300
public void appendFeatures(Feature features) throws FeatureStoreException, IOException, ParseException {
297301
List<Feature> featureList = new ArrayList<>();
@@ -304,8 +308,8 @@ public void appendFeatures(Feature features) throws FeatureStoreException, IOExc
304308
* Change the `enabled`, `histograms`, `correlations` or `columns` attributes and persist
305309
* the changes by calling this method.
306310
*
307-
* @throws FeatureStoreException FeatureStoreException
308-
* @throws IOException IOException
311+
* @throws FeatureStoreException If Client is not connected to Hopsworks,
312+
* @throws IOException Generic IO exception.
309313
*/
310314
public void updateStatisticsConfig() throws FeatureStoreException, IOException {
311315
featureGroupBaseEngine.updateStatisticsConfig(this, this.getClass());
@@ -315,8 +319,8 @@ public void updateStatisticsConfig() throws FeatureStoreException, IOException {
315319
* Recompute the statistics for the feature group and save them to the feature store.
316320
*
317321
* @return statistics object of computed statistics
318-
* @throws FeatureStoreException FeatureStoreException
319-
* @throws IOException IOException
322+
* @throws FeatureStoreException If Client is not connected to Hopsworks,
323+
* @throws IOException Generic IO exception.
320324
*/
321325
public Statistics computeStatistics() throws FeatureStoreException, IOException {
322326
if (statisticsConfig.getEnabled()) {
@@ -332,8 +336,8 @@ public Statistics computeStatistics() throws FeatureStoreException, IOException
332336
* Get the last statistics commit for the feature group.
333337
*
334338
* @return statistics object of latest commit
335-
* @throws FeatureStoreException FeatureStoreException
336-
* @throws IOException IOException
339+
* @throws FeatureStoreException If Client is not connected to Hopsworks,
340+
* @throws IOException Generic IO exception.
337341
*/
338342
@JsonIgnore
339343
public Statistics getStatistics() throws FeatureStoreException, IOException {
@@ -345,8 +349,8 @@ public Statistics getStatistics() throws FeatureStoreException, IOException {
345349
*
346350
* @param commitTime commit time in the format "YYYYMMDDhhmmss"
347351
* @return statistics object for the commit time
348-
* @throws FeatureStoreException FeatureStoreException
349-
* @throws IOException IOException
352+
* @throws FeatureStoreException If Client is not connected to Hopsworks and/or unable to identify date format.
353+
* @throws IOException Generic IO exception.
350354
*/
351355
@JsonIgnore
352356
public Statistics getStatistics(String commitTime) throws FeatureStoreException, IOException {
@@ -371,8 +375,8 @@ public void unloadSubject() {
371375
*
372376
* @param filter Filter metadata object
373377
* @return Query object
374-
* @throws FeatureStoreException FeatureStoreException
375-
* @throws IOException IOException
378+
* @throws FeatureStoreException If Client is not connected to Hopsworks.
379+
* @throws IOException Generic IO exception.
376380
*/
377381
public Query filter(Filter filter) throws FeatureStoreException, IOException {
378382
return this.selectAll().filter(filter);
@@ -383,8 +387,8 @@ public Query filter(Filter filter) throws FeatureStoreException, IOException {
383387
*
384388
* @param filter Filter metadata object
385389
* @return Query object
386-
* @throws FeatureStoreException FeatureStoreException
387-
* @throws IOException IOException
390+
* @throws FeatureStoreException If Client is not connected to Hopsworks.
391+
* @throws IOException Generic IO exception.
388392
*/
389393
public Query filter(FilterLogic filter) throws FeatureStoreException, IOException {
390394
return this.selectAll().filter(filter);
@@ -395,7 +399,7 @@ public Query filter(FilterLogic filter) throws FeatureStoreException, IOExceptio
395399
*
396400
* @param name feature name
397401
* @return Feature metadata object
398-
* @throws FeatureStoreException FeatureStoreException
402+
* @throws FeatureStoreException If Client is not connected to Hopsworks.
399403
*/
400404
@JsonIgnore
401405
public Feature getFeature(String name) throws FeatureStoreException {

python/hsfs/feature_group.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,9 +1574,9 @@ def insert(
15741574
or insert data from a dataframe into the existing feature group.
15751575
15761576
Incrementally insert data to a feature group or overwrite all data contained in the feature group. By
1577-
default, the data is inserted into the offline storag as well as the online storage if the feature group is
1578-
`online_enabled=True`. To insert only into the online storage, set `storage="online"`, or oppositely
1579-
`storage="offline"`.
1577+
default, the data is inserted into the offline storage as well as the online storage if the feature group is
1578+
`online_enabled=True`. To insert only into the online or offline storage set `storage="online"` or
1579+
`storage="offline"` respectively.
15801580
15811581
The `features` dataframe can be a Spark DataFrame or RDD, a Pandas DataFrame,
15821582
or a two-dimensional Numpy array or a two-dimensional Python nested list.
@@ -1992,7 +1992,7 @@ def commit_delete_record(
19921992
write_options: Optional[Dict[Any, Any]] = {},
19931993
):
19941994
"""Drops records present in the provided DataFrame and commits it as update to this
1995-
Feature group. This method can only be used on time travel enabled feature groups
1995+
Feature group. This method can only be used on time travel enabled feature groups.
19961996
19971997
# Arguments
19981998
delete_df: dataFrame containing records to be deleted.

0 commit comments

Comments
 (0)