alpharho1
diff --git a/‎binaries/benchmark_helper.cc
+38-3 b/‎binaries/benchmark_helper.cc
+38-3
diff --git a/‎binaries/benchmark_helper.h
+7 b/‎binaries/benchmark_helper.h
+7
diff --git a/‎binaries/caffe2_benchmark.cc
+5 b/‎binaries/caffe2_benchmark.cc
+5
diff --git a/‎binaries/tsv_2_proto.cc
+49 b/‎binaries/tsv_2_proto.cc
+49
diff --git a/‎caffe2/contrib/opencl/context.cc
+1-1 b/‎caffe2/contrib/opencl/context.cc
+1-1
diff --git a/‎caffe2/contrib/prof/profiling_annotations.h
+66-23 b/‎caffe2/contrib/prof/profiling_annotations.h
+66-23
diff --git a/‎caffe2/contrib/prof/profiling_annotations_test.cc
+30-13 b/‎caffe2/contrib/prof/profiling_annotations_test.cc
+30-13
@@ -31,6 +31,7 @@
 #include "observers/observer_config.h"
 #include "observers/perf_observer.h"
 
+using std::map;
 using std::shared_ptr;
 using std::string;
 using std::unique_ptr;
@@ -91,6 +92,7 @@ void setOperatorEngine(caffe2::NetDef* net_def, const string& backend) {
 void loadInput(
     shared_ptr<caffe2::Workspace> workspace,
     const bool run_on_gpu,
+    map<string, caffe2::TensorProtos>& tensor_protos_map,
     const string& input,
     const string& input_file,
     const string& input_dims,
@@ -105,9 +107,11 @@ void loadInput(
           input_files.size(),
           "Input name and file should have the same number.");
       for (int i = 0; i < input_names.size(); ++i) {
-        caffe2::BlobProto blob_proto;
-        CAFFE_ENFORCE(caffe2::ReadProtoFromFile(input_files[i], &blob_proto));
-        workspace->CreateBlob(input_names[i])->Deserialize(blob_proto);
+        caffe2::TensorProtos tensor_protos;
+        CAFFE_ENFORCE(
+            caffe2::ReadProtoFromFile(input_files[i], &tensor_protos));
+        workspace->CreateBlob(input_names[i]);
+        tensor_protos_map.insert(std::make_pair(input_names[i], tensor_protos));
       }
     } else if (input_dims.size() || input_type.size()) {
       CAFFE_ENFORCE_GE(
@@ -176,9 +180,38 @@ void loadInput(
   }
 }
 
+void fillInputBlob(
+    shared_ptr<caffe2::Workspace> workspace,
+    map<string, caffe2::TensorProtos>& tensor_protos_map,
+    int iteration) {
+  if (tensor_protos_map.empty()) {
+    return;
+  }
+
+  for (auto& tensor_kv : tensor_protos_map) {
+    caffe2::Blob* blob = workspace->GetBlob(tensor_kv.first);
+    if (blob == nullptr) {
+      blob = workspace->CreateBlob(tensor_kv.first);
+    }
+    // todo: support gpu and make this function a tempalte
+    int protos_size = tensor_kv.second.protos_size();
+    caffe2::TensorProto* tensor_proto =
+        tensor_kv.second.mutable_protos(iteration % protos_size);
+    caffe2::TensorCPU* tensor = blob->GetMutable<caffe2::TensorCPU>();
+    tensor->Resize(std::vector<caffe2::TIndex>());
+    if (tensor_proto->data_type() == caffe2::TensorProto::STRING) {
+      (tensor->mutable_data<std::string>())[0] = tensor_proto->string_data(0);
+    } else if (tensor_proto->data_type() == caffe2::TensorProto::FLOAT) {
+      (tensor->mutable_data<float>())[0] = tensor_proto->float_data(0);
+    }
+    // todo: for other types
+  }
+}
+
 void runNetwork(
     shared_ptr<caffe2::Workspace> workspace,
     caffe2::NetDef& net_def,
+    map<string, caffe2::TensorProtos>& tensor_protos_map,
     const bool wipe_cache,
     const bool run_individual,
     const int warmup,
@@ -194,6 +227,7 @@ void runNetwork(
   caffe2::ObserverConfig::initSampleRate(1, 1, 1, run_individual, warmup);
   LOG(INFO) << "Running warmup runs.";
   for (int i = 0; i < warmup; ++i) {
+    fillInputBlob(workspace, tensor_protos_map, i);
     CAFFE_ENFORCE(net->Run(), "Warmup run ", i, " has failed.");
   }
 
@@ -208,6 +242,7 @@ void runNetwork(
       ".");
   for (int i = 0; i < iter; ++i) {
     caffe2::ObserverConfig::initSampleRate(1, 1, 1, 0, warmup);
+    fillInputBlob(workspace, tensor_protos_map, i);
     CAFFE_ENFORCE(net->Run(), "Main run ", i, " has failed.");
     if (wipe_cache) {
       caffe2::wipe_cache();
 
@@ -24,6 +24,7 @@
 #include "caffe2/core/operator.h"
 #include "caffe2/utils/string_utils.h"
 
+using std::map;
 using std::shared_ptr;
 using std::string;
 using std::vector;
@@ -73,10 +74,15 @@ void setOperatorEngine(caffe2::NetDef*, const string&);
 void loadInput(
     shared_ptr<caffe2::Workspace>,
     const bool,
+    map<string, caffe2::TensorProtos>&,
     const string&,
     const string&,
     const string&,
     const string&);
+void fillInputBlob(
+    shared_ptr<caffe2::Workspace>,
+    map<string, caffe2::TensorProtos>&,
+    int iteration);
 void writeOutput(
     shared_ptr<caffe2::Workspace>,
     const bool,
@@ -86,6 +92,7 @@ void writeOutput(
 void runNetwork(
     shared_ptr<caffe2::Workspace>,
     caffe2::NetDef&,
+    map<string, caffe2::TensorProtos>&,
     const bool,
     const bool,
     const int,
 
@@ -5,6 +5,7 @@
 #include "binaries/benchmark_helper.h"
 
 using std::make_shared;
+using std::map;
 using std::string;
 using std::vector;
 
@@ -96,9 +97,12 @@ int main(int argc, char** argv) {
   setDeviceType(&net_def, run_dev);
   setOperatorEngine(&net_def, caffe2::FLAGS_backend);
 
+  map<string, caffe2::TensorProtos> tensor_protos_map;
+
   loadInput(
       workspace,
       run_on_gpu,
+      tensor_protos_map,
       caffe2::FLAGS_input,
       caffe2::FLAGS_input_file,
       caffe2::FLAGS_input_dims,
@@ -107,6 +111,7 @@ int main(int argc, char** argv) {
   runNetwork(
       workspace,
       net_def,
+      tensor_protos_map,
       caffe2::FLAGS_wipe_cache,
       caffe2::FLAGS_run_individual,
       caffe2::FLAGS_warmup,
 
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sstream>
+#include <string>
+
+#include "caffe2/core/blob_serialization.h"
+#include "caffe2/core/db.h"
+#include "caffe2/core/init.h"
+#include "caffe2/core/logging.h"
+#include "caffe2/proto/caffe2.pb.h"
+#include "caffe2/utils/proto_utils.h"
+
+CAFFE2_DEFINE_string(f_in, "", "The input data file name.");
+CAFFE2_DEFINE_string(f_out, "", "The output data file name.");
+
+int main(int argc, char** argv) {
+  caffe2::GlobalInit(&argc, &argv);
+  std::ifstream f_in(caffe2::FLAGS_f_in);
+  std::ofstream f_out(caffe2::FLAGS_f_out);
+  std::string line;
+  caffe2::TensorProtos tensor_protos;
+  while (std::getline(f_in, line)) {
+    caffe2::TensorProto* data = tensor_protos.add_protos();
+    data->set_data_type(caffe2::TensorProto::STRING);
+    data->add_dims(0);
+    data->add_string_data(line);
+    data->set_name("text");
+  }
+  f_in.close();
+  std::string output_str;
+  tensor_protos.SerializeToString(&output_str);
+  f_out << output_str;
+  f_out.close();
+  return 0;
+}
@@ -27,7 +27,7 @@ OpenCLContextSingleton::OpenCLContextSingleton() {
   }
   device = devices[device_id];
 
-  context  = cl::Context({device});
+  context = cl::Context({device});
   queue = cl::CommandQueue(context, device);
 }
 
 
@@ -3,54 +3,97 @@
 #pragma once
 
 #include "caffe2/contrib/prof/prof_dag_net.h"
-#include "caffe2/core/nomnigraph/include/nomnigraph/Representations/NeuralNet.h"
-
-using nom::repr::Annotation;
 
 namespace caffe2 {
+namespace contrib {
+namespace prof {
 
-// Annotations used when profiling a NeuralNetOperator.
-class ProfilingOperatorAnnotation : public Annotation {
+// Accumulates data points and generates two point summary: mean, stddev.
+class TwoNumberStats {
  public:
-  ProfilingOperatorAnnotation()
-      : Annotation(AnnotationKind::ProfilingOperator) {}
-  // LLVM-style RTTI implementation.
-  static bool classof(const Annotation* annotation) {
-    return annotation->getKind() == AnnotationKind::ProfilingOperator;
+  TwoNumberStats() : sum_(0), squareSum_(0), count_(0) {}
+  // To prepopulate state of the TwoNumberStats accumulator.
+  TwoNumberStats(float mean, float stddev, int count)
+      : sum_(mean * count),
+        squareSum_((stddev * stddev + mean * mean) * count),
+        count_(count) {}
+  void addPoint(float point) {
+    sum_ += point;
+    squareSum_ += point * point;
+    count_++;
+  }
+  float getMean() const {
+    if (count_ == 0) {
+      return 0;
+    }
+    return sum_ / count_;
   }
+  // Returns population stddev.
+  float getStddev() const {
+    if (count_ == 0) {
+      return 0;
+    }
+    return sqrt((count_ * squareSum_ - sum_ * sum_) / (count_ * count_));
+  }
+
+ private:
+  // Sum of data points.
+  float sum_;
+  // Sum of square of data points.
+  float squareSum_;
+  // Sample count.
+  int count_;
+};
+
+// Annotations used when profiling a NeuralNetOperator.
+class ProfilingOperatorAnnotation {
+ public:
+  ProfilingOperatorAnnotation() {}
+  explicit ProfilingOperatorAnnotation(const ProfDAGProto& stats_proto)
+      : execution_time_ms_(
+            stats_proto.execution_time().mean(),
+            stats_proto.execution_time().stddev(),
+            stats_proto.execution_time().count()) {}
+  ProfilingOperatorAnnotation(ProfilingOperatorAnnotation&&) = default;
   // Accessors
-  const Stats& execution_time_ms() const {
+  const TwoNumberStats& getExecutionTimeMs() const {
     return execution_time_ms_;
   }
-  Stats* mutable_execution_time_ms() {
+  TwoNumberStats* getMutableExecutionTimeMs() {
     return &execution_time_ms_;
   }
 
  private:
   // Statistics for how long this op took to execute.
-  Stats execution_time_ms_;
+  TwoNumberStats execution_time_ms_;
 };
 
-// Annotations used when profiling a NeuralNetData.
-class ProfilingDataAnnotation : public Annotation {
+// Annotations used when profiling a NeuralNetData. Data this class
+// stores is translatable to/from BlobProfile. Note: translation
+// may be lossy due to use of floating point arithmetic.
+class ProfilingDataAnnotation {
  public:
-  ProfilingDataAnnotation() : Annotation(AnnotationKind::ProfilingData) {}
-  // LLVM-style RTTI implementation.
-  static bool classof(const Annotation* annotation) {
-    return annotation->getKind() == AnnotationKind::ProfilingData;
-  }
+  ProfilingDataAnnotation() {}
+  explicit ProfilingDataAnnotation(const BlobProfile& profile)
+      : used_bytes_(
+            profile.bytes_used().mean(),
+            profile.bytes_used().stddev(),
+            profile.bytes_used().count()) {}
+  ProfilingDataAnnotation(ProfilingDataAnnotation&&) = default;
   // Accessors
-  const Stats& used_bytes() const {
+  const TwoNumberStats& getUsedBytes() const {
     return used_bytes_;
   }
-  Stats* mutable_used_bytes() {
+  TwoNumberStats* getMutableUsedBytes() {
     return &used_bytes_;
   }
 
  private:
   // Statistics for how much data this tensor/parameter used (per invocation of
   // the op that generated the data).
-  Stats used_bytes_;
+  TwoNumberStats used_bytes_;
 };
 
+} // namespace prof
+} // namespace contrib
 } // namespace caffe2
@@ -4,24 +4,41 @@
 #include <gtest/gtest.h>
 
 namespace caffe2 {
+namespace contrib {
+namespace prof {
 namespace {
 
-TEST(ProfilingAnnotationsTest, BasicAccess) {
+TEST(TwoNumberStatsTest, ComputeAndGetOpStatsSummary) {
+  // e.g., 2 and 3
+  TwoNumberStats stats;
+  stats.addPoint(2);
+  stats.addPoint(3);
+  EXPECT_FLOAT_EQ(2.5, stats.getMean());
+  // Population standard deviation.
+  EXPECT_FLOAT_EQ(0.5, stats.getStddev());
+}
+
+TEST(TwoNumberStatsTest, TestRestore) {
+  // Expect that restore&recompute is still the same.
+  // E.g., 2 and 3 (above).
+  TwoNumberStats stats(2.5, 0.5, 2);
+  // Expect that restore&recompute is still the same.
+  EXPECT_FLOAT_EQ(2.5, stats.getMean());
+  // Population standard deviation.
+  EXPECT_FLOAT_EQ(0.5, stats.getStddev());
+}
+
+TEST(ProfilingAnnotationsTest, BasicAccessToActiveData) {
   ProfilingOperatorAnnotation op_annotation;
-  op_annotation.mutable_execution_time_ms()->sum = 5;
+  op_annotation.getMutableExecutionTimeMs()->addPoint(5);
+  EXPECT_EQ(5, op_annotation.getExecutionTimeMs().getMean());
+
   ProfilingDataAnnotation data_annotation;
-  data_annotation.mutable_used_bytes()->sum = 7;
-  auto* op_annotation_ptr =
-      dyn_cast<ProfilingOperatorAnnotation>(&op_annotation);
-  ASSERT_NE(nullptr, op_annotation_ptr);
-  EXPECT_EQ(5, op_annotation_ptr->execution_time_ms().sum);
-  auto* data_annotation_ptr =
-      dyn_cast<ProfilingDataAnnotation>(&data_annotation);
-  ASSERT_NE(nullptr, data_annotation_ptr);
-  EXPECT_EQ(7, data_annotation_ptr->used_bytes().sum);
-  EXPECT_EQ(nullptr, dyn_cast<ProfilingOperatorAnnotation>(&data_annotation));
-  EXPECT_EQ(nullptr, dyn_cast<ProfilingDataAnnotation>(&op_annotation));
+  data_annotation.getMutableUsedBytes()->addPoint(7);
+  EXPECT_EQ(7, data_annotation.getUsedBytes().getMean());
 }
 
 } // namespace
+} // namespace prof
+} // namespace contrib
 } // namespace caffe2
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ OpenCLContextSingleton::OpenCLContextSingleton() {`
`27`	`27`	`}`
`28`	`28`	`device = devices[device_id];`
`29`	`29`
`30`		`- context = cl::Context({device});`
	`30`	`+ context = cl::Context({device});`
`31`	`31`	`queue = cl::CommandQueue(context, device);`
`32`	`32`	`}`
`33`	`33`