HandsOnOpenCL · azatsman · Jan 2, 2017 · Jan 2, 2017 · Aug 7, 2017 · Aug 8, 2017
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@
 
 # C
 *.o
+*~
 
 # Produced binarys
 Exercises/Exercise01/C/DeviceInfo

diff --git a/Solutions/Exercise08/C/matmul.c b/Solutions/Exercise08/C/matmul.c
@@ -34,6 +34,8 @@ int main(int argc, char *argv[])
     float *h_A;             // A matrix
     float *h_B;             // B matrix
     float *h_C;             // C = A*B matrix
+    float *C0;              // Result computed sequantially on the host for
+                            //   later error checking.
     int N;                  // A[N][N], B[N][N], C[N][N]
     int size;               // number of elements in each matrix
 
@@ -58,7 +60,7 @@ int main(int argc, char *argv[])
     h_A = (float *)malloc(size * sizeof(float));
     h_B = (float *)malloc(size * sizeof(float));
     h_C = (float *)malloc(size * sizeof(float));
-
+    C0  = (float *)malloc(size * sizeof(float));
 
 
 //--------------------------------------------------------------------------------
@@ -105,10 +107,10 @@ int main(int argc, char *argv[])
         zero_mat(N, h_C);
         start_time = wtime();
 
-        seq_mat_mul_sdot(N, h_A, h_B, h_C);
+        seq_mat_mul_sdot(N, h_A, h_B, C0);
 
         run_time  = wtime() - start_time;
-        results(N, h_C, run_time);
+        results(N, C0, C0, run_time);
     }
 
 //--------------------------------------------------------------------------------
@@ -195,7 +197,7 @@ int main(int argc, char *argv[])
             0, NULL, NULL);
         checkError(err, "Reading back d_c");
 
-        results(N, h_C, run_time);
+        results(N, h_C, C0, run_time);
 
     } // end for loop
 
@@ -262,7 +264,7 @@ int main(int argc, char *argv[])
             0, NULL, NULL);
         checkError(err, "Reading back d_c");
 
-        results(N, h_C, run_time);
+        results(N, h_C, C0, run_time);
 
     } // end for loop
 
@@ -331,7 +333,7 @@ int main(int argc, char *argv[])
             0, NULL, NULL);
         checkError(err, "Reading back d_c");
 
-        results(N, h_C, run_time);
+        results(N, h_C, C0, run_time);
 
     } // end for loop
 
@@ -401,7 +403,7 @@ int main(int argc, char *argv[])
             0, NULL, NULL);
         checkError(err, "Reading back d_c");
 
-        results(N, h_C, run_time);
+        results(N, h_C, C0, run_time);
 
     } // end for loop
 
@@ -478,7 +480,7 @@ int main(int argc, char *argv[])
             0, NULL, NULL);
         checkError(err, "Reading back d_c");
 
-        results(N, h_C, run_time);
+        results(N, h_C, C0, run_time);
 
     } // end for loop
 
@@ -489,6 +491,7 @@ int main(int argc, char *argv[])
     free(h_A);
     free(h_B);
     free(h_C);
+    free(C0);
     clReleaseMemObject(d_a);
     clReleaseMemObject(d_b);
     clReleaseMemObject(d_c);

diff --git a/Solutions/Exercise08/C/matrix_lib.c b/Solutions/Exercise08/C/matrix_lib.c
@@ -47,21 +47,23 @@ void seq_mat_mul_sdot(int N, float *A, float *B, float *C)
 //------------------------------------------------------------------------------
 void initmat(int N, float *A, float *B, float *C)
 {
-    int i, j;
+  int i, j;
 
-    /* Initialize matrices */
+  /* Initialize matrices */
 
-	for (i = 0; i < N; i++)
-		for (j = 0; j < N; j++)
-			A[i*N+j] = AVAL;
+  int vv = 1;
 
-	for (i = 0; i < N; i++)
-		for (j = 0; j < N; j++)
-			B[i*N+j] = BVAL;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      A[i*N+j] = (float) ((vv++) % 17);
 
-	for (i = 0; i < N; i++)
-		for (j = 0; j < N; j++)
-			C[i*N+j] = 0.0f;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      B[i*N+j] = (float) ((vv++) % 11);
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      C[i*N+j] = (float) ((vv++) % 19);
 }
 
 //------------------------------------------------------------------------------
@@ -97,35 +99,34 @@ void trans(int N, float *B, float *Btrans)
 //  Function to compute errors of the product matrix
 //
 //------------------------------------------------------------------------------
-float error(int N, float *C)
+float error(int N, float *C1, float *C2)
 {
-   int i,j;
-   float cval, errsq, err;
-   cval = (float) N * AVAL * BVAL;
-   errsq = 0.0f;
-
-    for (i = 0; i < N; i++) {
-        for (j = 0; j < N; j++) {
-            err = C[i*N+j] - cval;
-            errsq += err * err;
-        }
+  int i,j;
+  float cval, errsq, err;
+  cval  = (float) N * AVAL * BVAL;
+  errsq = 0.0f;
+  for   (i = 0; i < N; i++) {
+    for (j = 0; j < N; j++) {
+      err = C1[i*N+j] - C2[i*N+j];
+      errsq += err * err;
     }
-    return errsq;
+  }
+  return errsq;
 }
 
 //------------------------------------------------------------------------------
 //
 //  Function to analyze and output results
 //
 //------------------------------------------------------------------------------
-void results(int N, float *C, double run_time)
+void results(int N, float *C1, float *C2, double run_time)
 {
     float mflops;
     float errsq;
 
     mflops = 2.0 * N * N * N/(1000000.0f * run_time);
     printf(" %.2f seconds at %.1f MFLOPS \n",  run_time,mflops);
-    errsq = error(N, C);
+    errsq = error(N, C1, C2);
     if (isnan(errsq) || errsq > TOL) {
         printf("\n Errors in multiplication: %f\n",errsq);
         exit(1);

diff --git a/Solutions/Exercise08/C/matrix_lib.h b/Solutions/Exercise08/C/matrix_lib.h
@@ -46,14 +46,14 @@ void trans(int N, float *B, float *Btrans);
 //  Function to compute errors of the product matrix
 //
 //------------------------------------------------------------------------------
-float error(int N, float *C);
+float error(int N, float *C1, float *C2);
 
 
 //------------------------------------------------------------------------------
 //
 //  Function to analyze and output results 
 //
 //------------------------------------------------------------------------------
-void results(int N, float *C, double run_time);
+void results(int N, float *C1, float *C2, double run_time);
 
 #endif
diff --git a/Solutions/Exercise08/Cpp/matmul.cpp b/Solutions/Exercise08/Cpp/matmul.cpp
@@ -45,9 +45,13 @@ int main(int argc, char *argv[])
     std::vector<float> h_A(size); // Host memory for Matrix A
     std::vector<float> h_B(size); // Host memory for Matrix B
     std::vector<float> h_C(size); // Host memory for Matrix C
+    std::vector<float> C0 (size); // Result computed sequantially on the host for
+                                  //   later error checking.
 
     cl::Buffer d_a, d_b, d_c;   // Matrices in device memory
 
+
+
 //--------------------------------------------------------------------------------
 // Create a context and queue
 //--------------------------------------------------------------------------------
@@ -84,18 +88,18 @@ int main(int argc, char *argv[])
 // Run sequential matmul
 //--------------------------------------------------------------------------------
 
-        initmat(N, h_A, h_B, h_C);
+        initmat(N, h_A, h_B, C0);
 
         printf("\n===== Sequential, matrix mult (dot prod), order %d on host CPU ======\n",ORDER);
         for(int i = 0; i < COUNT; i++)
         {
             zero_mat(N, h_C);
             start_time = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;
 
-            seq_mat_mul_sdot(N, h_A, h_B, h_C);
+            seq_mat_mul_sdot(N, h_A, h_B, C0);
 
             run_time  = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0 - start_time;
-            results(N, h_C, run_time);
+            results(N, C0, C0, run_time);
         }
 
 //--------------------------------------------------------------------------------
@@ -144,7 +148,7 @@ int main(int argc, char *argv[])
 
             cl::copy(queue, d_c, h_C.begin(), h_C.end());
 
-            results(N, h_C, run_time);
+            results(N, h_C, C0, run_time);
 
         } // end for loop
 
@@ -177,7 +181,7 @@ int main(int argc, char *argv[])
 
             cl::copy(queue, d_c, h_C.begin(), h_C.end());
 
-            results(N, h_C, run_time);
+            results(N, h_C, C0, run_time);
 
         } // end for loop
 
@@ -211,7 +215,7 @@ int main(int argc, char *argv[])
 
             cl::copy(queue, d_c, h_C.begin(), h_C.end());
 
-            results(N, h_C, run_time);
+            results(N, h_C, C0, run_time);
 
         } // end for loop
 
@@ -248,7 +252,7 @@ int main(int argc, char *argv[])
 
             cl::copy(queue, d_c, h_C.begin(), h_C.end());
 
-            results(N, h_C, run_time);
+            results(N, h_C, C0, run_time);
 
         } // end for loop
 
@@ -297,7 +301,7 @@ int main(int argc, char *argv[])
 
             cl::copy(queue, d_c, h_C.begin(), h_C.end());
 
-            results(N, h_C, run_time);
+            results(N, h_C, C0, run_time);
 
         } // end for loop
     } catch (cl::Error err)

diff --git a/Solutions/Exercise08/Cpp/matrix_lib.cpp b/Solutions/Exercise08/Cpp/matrix_lib.cpp
@@ -43,26 +43,34 @@ void seq_mat_mul_sdot(int N, std::vector<float>& A, std::vector<float>& B, std::
 
 //------------------------------------------------------------------------------
 //
-//  Function to initialize the input matrices A and B
+//  Function to initialize the input matrices A and B.
+//  Matrices are initialized to small but non-constant values. The values need
+//    to be relatively small to avoid signle-precision floating point errors
+//    in long sums.
 //
 //------------------------------------------------------------------------------
+
+
+
 void initmat(int N, std::vector<float>& A, std::vector<float>& B, std::vector<float>& C)
 {
-    int i, j;
+  int i, j;
 
-    /* Initialize matrices */
+  /* Initialize matrices */
 
-    for (i = 0; i < N; i++)
-        for (j = 0; j < N; j++)
-            A[i*N+j] = AVAL;
+  int vv = 1;
 
-    for (i = 0; i < N; i++)
-        for (j = 0; j < N; j++)
-            B[i*N+j] = BVAL;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      A[i*N+j] = static_cast<float>((vv++) % 17);
 
-    for (i = 0; i < N; i++)
-        for (j = 0; j < N; j++)
-            C[i*N+j] = 0.0f;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      B[i*N+j] = static_cast<float>((vv++) % 11);
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      C[i*N+j] = static_cast<float>((vv++) % 19);
 }
 
 //------------------------------------------------------------------------------
@@ -98,6 +106,7 @@ void trans(int N, std::vector<float>& B, std::vector<float>& Btrans)
 //  Function to compute errors of the product matrix
 //
 //------------------------------------------------------------------------------
+
 float error(int N, std::vector<float>& C)
 {
    int i,j;
@@ -114,6 +123,23 @@ float error(int N, std::vector<float>& C)
     return errsq;
 }
 
+// Compare two matrices, which are expected to be identical, and return the error:
+
+float error (int N, std::vector<float>& C1, std::vector<float>& C2)
+{
+  int i,j;
+  float cval, errsq, err;
+  cval  = (float) N * AVAL * BVAL;
+  errsq = 0.0f;
+  for   (i = 0; i < N; i++) {
+    for (j = 0; j < N; j++) {
+      err = C1[i*N+j] - C2[i*N+j];
+      errsq += err * err;
+    }
+  }
+  return errsq;
+}
+
 //------------------------------------------------------------------------------
 //
 //  Function to analyze and output results
@@ -132,3 +158,15 @@ void results(int N, std::vector<float>& C, double run_time)
            printf("\n Errors in multiplication: %f\n",errsq);
 }
 
+// Compare two matrices:
+
+void results (int N, std::vector<float>& C1, std::vector<float>& C2, double run_time)
+{
+  float mflops;
+  float errsq;
+  mflops = 2.0 * N * N * N/(1000000.0f * run_time);
+  printf(" %.2f seconds at %.1f MFLOPS \n",  run_time,mflops);
+  errsq = error(N, C1, C2);
+  if (std::isnan(errsq) || errsq > TOL)
+    printf("\n Errors in multiplication: %f\n",errsq);
+}
diff --git a/Solutions/Exercise08/Cpp/matrix_lib.hpp b/Solutions/Exercise08/Cpp/matrix_lib.hpp
@@ -55,6 +55,9 @@ float error(int N, std::vector<float>& C);
 //  Function to analyze and output results 
 //
 //------------------------------------------------------------------------------
+
 void results(int N, std::vector<float>& C, double run_time);
 
+void results(int N, std::vector<float>& C1, std::vector<float>& C2, double run_time);
+
 #endif
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ @@
     # C
     *.o
+    *~
     # Produced binarys
     Exercises/Exercise01/C/DeviceInfo
@@ Expand Down @@