AIS cluster provides out-of-the-box integration with TensorFlow TFRecord format
- Creating TensorFlow datasets from TFRecords stored in AIS cluster with
tf.data.TFRecordDatasetAPI. See S3 compatibility docs - Creating TensorFlow datasets from TAR files stored in AIS cluster with
tf.data.TFRecordDatasetAPI. The conversion is executed remotely, on the fly in the cluster.
import tensorflow as tf
import os
os.environ["S3_ENDPOINT"] = CLUSTER_ENDPOINT
# (...)
train_dataset = tf.data.TFRecordDataset(filenames=[
"s3://tf/train-1.tfrecord",
"s3://tf/train-2.tfrecord",
]).map(record_parser).batch(BATCH_SIZE)
# (...)
model.fit(train_dataset, ...)import tensorflow as tf
import os
os.environ["S3_ENDPOINT"] = CLUSTER_ENDPOINT
# (...)
# ?uuid query param to convert TAR to a transformed data.
train_dataset = tf.data.TFRecordDataset(filenames=[
"s3://tf/train-1.tar?uuid=<uuid of tensorflow transformer>",
"s3://tf/train-2.tar?uuid=<uuid of tensorflow transformer>",
]).map(record_parser).batch(BATCH_SIZE)
# (...)
model.fit(train_dataset, ...)