Skip to content

Commit 04c8187

Browse files
yk5yhliang2018
authored andcommitted
Exemplify csv handing in serving for boosted_trees model. (tensorflow#4401)
* Exemplify csv handing in serving for boosted_trees model by using custom built signature_def. * some minor touches. * Reverted back to using the file instead of module in the example.
1 parent c519da2 commit 04c8187

File tree

3 files changed

+68
-15
lines changed

3 files changed

+68
-15
lines changed

official/boosted_trees/README.md

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,36 @@ saved_model_cli show --dir /tmp/higgs_boosted_trees_saved_model/${TIMESTAMP}/ \
7474
```
7575

7676
### Inference
77-
Let's use the model to predict the income group of two examples:
77+
Let's use the model to predict the income group of two examples.
78+
Note that this model exports SavedModel with the custom parsing module that accepts csv lines as features. (Each line is an example with 28 columns; be careful to not add a label column, unlike in the training data.)
7879

7980
```
8081
saved_model_cli run --dir /tmp/boosted_trees_higgs_saved_model/${TIMESTAMP}/ \
8182
--tag_set serve --signature_def="predict" \
82-
--input_examples='examples=[{"feature_01":[0.8692932],"feature_02":[-0.6350818],"feature_03":[0.2256903],"feature_04":[0.3274701],"feature_05":[-0.6899932],"feature_06":[0.7542022],"feature_07":[-0.2485731],"feature_08":[-1.0920639],"feature_09":[0.0],"feature_10":[1.3749921],"feature_11":[-0.6536742],"feature_12":[0.9303491],"feature_13":[1.1074361],"feature_14":[1.1389043],"feature_15":[-1.5781983],"feature_16":[-1.0469854],"feature_17":[0.0],"feature_18":[0.6579295],"feature_19":[-0.0104546],"feature_20":[-0.0457672],"feature_21":[3.1019614],"feature_22":[1.3537600],"feature_23":[0.9795631],"feature_24":[0.9780762],"feature_25":[0.9200048],"feature_26":[0.7216575],"feature_27":[0.9887509],"feature_28":[0.8766783]}, {"feature_01":[1.5958393],"feature_02":[-0.6078107],"feature_03":[0.0070749],"feature_04":[1.8184496],"feature_05":[-0.1119060],"feature_06":[0.8475499],"feature_07":[-0.5664370],"feature_08":[1.5812393],"feature_09":[2.1730762],"feature_10":[0.7554210],"feature_11":[0.6431096],"feature_12":[1.4263668],"feature_13":[0.0],"feature_14":[0.9216608],"feature_15":[-1.1904324],"feature_16":[-1.6155890],"feature_17":[0.0],"feature_18":[0.6511141],"feature_19":[-0.6542270],"feature_20":[-1.2743449],"feature_21":[3.1019614],"feature_22":[0.8237606],"feature_23":[0.9381914],"feature_24":[0.9717582],"feature_25":[0.7891763],"feature_26":[0.4305533],"feature_27":[0.9613569],"feature_28":[0.9578179]}]'
83+
--input_exprs='inputs=["0.869293,-0.635082,0.225690,0.327470,-0.689993,0.754202,-0.248573,-1.092064,0.0,1.374992,-0.653674,0.930349,1.107436,1.138904,-1.578198,-1.046985,0.0,0.657930,-0.010455,-0.045767,3.101961,1.353760,0.979563,0.978076,0.920005,0.721657,0.988751,0.876678", "1.595839,-0.607811,0.007075,1.818450,-0.111906,0.847550,-0.566437,1.581239,2.173076,0.755421,0.643110,1.426367,0.0,0.921661,-1.190432,-1.615589,0.0,0.651114,-0.654227,-1.274345,3.101961,0.823761,0.938191,0.971758,0.789176,0.430553,0.961357,0.957818"]'
8384
```
8485

85-
This will print out the predicted classes and class probabilities.
86+
This will print out the predicted classes and class probabilities. Something like:
87+
88+
```
89+
Result for output key class_ids:
90+
[[1]
91+
[0]]
92+
Result for output key classes:
93+
[['1']
94+
['0']]
95+
Result for output key logistic:
96+
[[0.6440273 ]
97+
[0.10902369]]
98+
Result for output key logits:
99+
[[ 0.59288704]
100+
[-2.1007526 ]]
101+
Result for output key probabilities:
102+
[[0.3559727 0.6440273]
103+
[0.8909763 0.1090237]]
104+
```
105+
106+
Please note that "predict" signature_def gives out different (more detailed) results than "classification" or "serving_default".
86107

87108
## Additional Links
88109

official/boosted_trees/train_higgs.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ def read_higgs_data(data_dir, train_start, train_count, eval_start, eval_count):
6464
with tf.gfile.Open(npz_filename, "rb") as npz_file:
6565
with np.load(npz_file) as npz:
6666
data = npz["data"]
67-
except Exception as e:
67+
except tf.errors.NotFoundError as e:
6868
raise RuntimeError(
69-
"Error loading data; use data_download.py to prepare the data:\n{}: {}"
69+
"Error loading data; use data_download.py to prepare the data.\n{}: {}"
7070
.format(type(e).__name__, e))
7171
return (data[train_start:train_start+train_count],
7272
data[eval_start:eval_start+eval_count])
@@ -91,6 +91,7 @@ def make_inputs_from_np_arrays(features_np, label_np):
9191
9292
Returns:
9393
input_fn: A function returning a Dataset of feature dict and label.
94+
feature_names: A list of feature names.
9495
feature_column: A list of tf.feature_column.BucketizedColumn.
9596
"""
9697
num_features = features_np.shape[1]
@@ -127,7 +128,7 @@ def input_fn():
127128
return tf.data.Dataset.zip((tf.data.Dataset.from_tensors(features),
128129
tf.data.Dataset.from_tensors(label_np),))
129130

130-
return input_fn, bucketized_columns
131+
return input_fn, feature_names, bucketized_columns
131132

132133

133134
def make_eval_inputs_from_np_arrays(features_np, label_np):
@@ -149,6 +150,31 @@ def input_fn():
149150
return input_fn
150151

151152

153+
def _make_csv_serving_input_receiver_fn(column_names, column_defaults):
154+
"""Returns serving_input_receiver_fn for csv.
155+
156+
The input arguments are relevant to `tf.decode_csv()`.
157+
158+
Args:
159+
column_names: a list of column names in the order within input csv.
160+
column_defaults: a list of default values with the same size of
161+
column_names. Each entity must be either a list of one scalar, or an
162+
empty list to denote the corresponding column is required.
163+
e.g. [[""], [2.5], []] indicates the third column is required while
164+
the first column must be string and the second must be float/double.
165+
166+
Returns:
167+
a serving_input_receiver_fn that handles csv for serving.
168+
"""
169+
def serving_input_receiver_fn():
170+
csv = tf.placeholder(dtype=tf.string, shape=[None], name="csv")
171+
features = dict(zip(column_names, tf.decode_csv(csv, column_defaults)))
172+
receiver_tensors = {"inputs": csv}
173+
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
174+
175+
return serving_input_receiver_fn
176+
177+
152178
def train_boosted_trees(flags_obj):
153179
"""Train boosted_trees estimator on HIGGS data.
154180
@@ -164,9 +190,8 @@ def train_boosted_trees(flags_obj):
164190
flags_obj.eval_start, flags_obj.eval_count)
165191
tf.logging.info("## Data loaded; train: {}{}, eval: {}{}".format(
166192
train_data.dtype, train_data.shape, eval_data.dtype, eval_data.shape))
167-
168193
# Data consists of one label column followed by 28 feature columns.
169-
train_input_fn, feature_columns = make_inputs_from_np_arrays(
194+
train_input_fn, feature_names, feature_columns = make_inputs_from_np_arrays(
170195
features_np=train_data[:, 1:], label_np=train_data[:, 0:1])
171196
eval_input_fn = make_eval_inputs_from_np_arrays(
172197
features_np=eval_data[:, 1:], label_np=eval_data[:, 0:1])
@@ -202,11 +227,14 @@ def train_boosted_trees(flags_obj):
202227
# Benchmark the evaluation results
203228
benchmark_logger.log_evaluation_result(eval_results)
204229

205-
# Exporting the savedmodel.
230+
# Exporting the savedmodel with csv parsing.
206231
if flags_obj.export_dir is not None:
207-
feature_spec = tf.estimator.export.build_parsing_serving_input_receiver_fn(
208-
tf.feature_column.make_parse_example_spec(feature_columns))
209-
classifier.export_savedmodel(flags_obj.export_dir, feature_spec)
232+
classifier.export_savedmodel(
233+
flags_obj.export_dir,
234+
_make_csv_serving_input_receiver_fn(
235+
column_names=feature_names,
236+
# columns are all floats.
237+
column_defaults=[[0.0]] * len(feature_names)))
210238

211239

212240
def main(_):

official/boosted_trees/train_higgs_test.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,13 @@ def test_make_inputs_from_np_arrays(self):
7373
train_data, _ = train_higgs.read_higgs_data(
7474
self.data_dir,
7575
train_start=0, train_count=15, eval_start=15, eval_count=5)
76-
input_fn, feature_columns = train_higgs.make_inputs_from_np_arrays(
77-
features_np=train_data[:, 1:], label_np=train_data[:, 0:1])
76+
(input_fn, feature_names,
77+
feature_columns) = train_higgs.make_inputs_from_np_arrays(
78+
features_np=train_data[:, 1:], label_np=train_data[:, 0:1])
79+
80+
# Check feature_names.
81+
self.assertAllEqual(feature_names,
82+
["feature_%02d" % (i+1) for i in range(28)])
7883

7984
# Check feature columns.
8085
self.assertEqual(28, len(feature_columns))
@@ -86,7 +91,6 @@ def test_make_inputs_from_np_arrays(self):
8691
self.assertIsInstance(feature_column, bucketized_column_type)
8792
# At least 2 boundaries.
8893
self.assertGreaterEqual(len(feature_column.boundaries), 2)
89-
feature_names = ["feature_%02d" % (i+1) for i in range(28)]
9094
# Tests that the source column names of the bucketized columns match.
9195
self.assertAllEqual(feature_names,
9296
[col.source_column.name for col in feature_columns])

0 commit comments

Comments
 (0)