Merge branch 'release/v0.2.0'

esa-tu-darmstadt · Nov 19, 2021 · acab827 · acab827
2 parents c3f98eb + 81458c1
commit acab827
Show file tree

Hide file tree

Showing 245 changed files with 38,937 additions and 3,713 deletions.
diff --git a/.gitignore b/.gitignore
@@ -33,3 +33,5 @@ cmake-build-*/
 **/.eggs
 **/*.egg-info
 .vscode/
+
+**/lit.site.cfg.py
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -8,7 +8,7 @@
 
 cmake_minimum_required(VERSION 3.13.4 FATAL_ERROR)
 
-project(spnc VERSION 0.1 LANGUAGES CXX C)
+project(spnc VERSION 0.2.0 LANGUAGES CXX C)
 
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -35,6 +35,10 @@ endif (${CUDA_GPU_SUPPORT})
 
 set(SPNC_CXX_WARNING_LEVEL "-Wall")
 
+option(SPNC_SLP_DEBUG
+        "Enable additional debug output for the SLP vectorizer"
+        OFF)
+
 #
 # clang-tidy setup
 #

diff --git a/CudaToolkit.cmake b/CudaToolkit.cmake
@@ -26,26 +26,4 @@ macro(cuda_setup)
         message(STATUS "Using CUDA runtime library: " ${CUDA_RUNTIME_LIBRARY})
     endif ()
 
-    # When compiling for CUDA GPUs, the generated Kernel must be linked with the
-    # MLIR CUDA runtime wrappers for data-transfer, kernel-launch etc.
-    # When building static libraries, we want to create a complete Python package, and
-    # include the MLIR CUDA wrapper library, which the compiled kernels for the GPU target need
-    # in the Python wheel.
-    find_library(MLIR_CUDA_WRAPPERS cuda-runtime-wrappers HINTS ${LLVM_BUILD_LIBRARY_DIR})
-    if (NOT MLIR_CUDA_WRAPPERS)
-        message(FATAL_ERROR "MLIR CUDA wrappers not found.")
-    else ()
-        # Resolve symbolic link if necessary.
-        if(IS_SYMLINK ${MLIR_CUDA_WRAPPERS})
-            file(READ_SYMLINK ${MLIR_CUDA_WRAPPERS} MLIR_CUDA_SYM)
-            if(NOT IS_ABSOLUTE "${MLIR_CUDA_SYM}")
-                get_filename_component(dir "${MLIR_CUDA_WRAPPERS}" DIRECTORY)
-                set(MLIR_CUDA_WRAPPERS "${dir}/${MLIR_CUDA_SYM}")
-            else()
-                set(MLIR_CUDA_WRAPPERS "${MLIR_CUDA_SYM}")
-            endif()
-        endif()
-        message(STATUS "Using MLIR CUDA wrappers: ${MLIR_CUDA_WRAPPERS}")
-    endif()
-
 endmacro(cuda_setup)
diff --git a/README.md b/README.md
@@ -11,8 +11,8 @@ Starting with release 0.0.4, **SPNC** is mostly implemented in `C++` and uses
 the [LLVM compiler framework](https://llvm.org/)
 and [MLIR](https://mlir.llvm.org) for code generation for the different targets.
 
-Currently supported targets are CPUs (all architectures supported by LLVM, vectorization currently limited to X86) and
-CUDA GPUs.
+Currently supported targets are CPUs (all architectures supported by LLVM, vectorization currently limited to X86 (AVX,
+AVX2, AVX-512) and ARM Neon) and CUDA GPUs.
 
 
 ### Installation ###
@@ -28,8 +28,8 @@ requirements.
 
 In case you want to use **SPNC** on a different platform or want to build **SPNC** from source, follow
 the [installation instructions](https://github.com/esa-tu-darmstadt/spn-compiler/wiki/Installation-Manual) to build
-**SPNC** and all its dependencies from source. Currently, `spnc` is based on LLVM
-commit `f8d3f47e1fd09392aa30df83849b25acd8c59a25`.
+**SPNC** and all its dependencies from source. Currently, `spnc` is based on LLVM release 13 (branch `release/13.x`,
+commit `cf15ccdeb6d5254ee7d46c7535c29200003a3880`).
 
 ### Usage ###
 
@@ -141,3 +141,12 @@ The Installation Manual also contains a section on a
 
 SPNC is licensed under the **Apache License v2.0**, see the *LICENSE* file that was distributed with this source code
 for more information.
+
+### Citation ###
+
+If you use SPNC for your research, please cite the following publication:
+
+Lukas Sommer, Cristian Axenie, Andreas Koch (2022).
+**SPNC: An Open-Source MLIR-Based Compiler for Fast Sum-Product Network Inference on CPUs and GPUs**. In *2022 IEEE/ACM
+International Symposium on Code Generation and Optimization (CGO).*
+[Preprint](https://www.esa.informatik.tu-darmstadt.de/assets/publications/materials/2022/2022_CGO_LS.pdf).
diff --git a/README_SLP.md b/README_SLP.md
@@ -0,0 +1,43 @@
+# SLP-Vectorization #
+
+The SLP directory contains 11 files, each dealing with a different SLP topic.
+
+Please note that a _superword_ in this project describes an SLP vector containing the elements. This term was chosen because a _vector_ is slightly overloaded with meanings in C++. 
+
+* CostModel.h
+  * Contains the cost model, which assigns cost to scalar operations, superwords and entire patterns using a visitor pattern.
+
+* GraphConversion.h
+  * A very important file that contains the ConversionManager class. The conversion manager keeps track of created vector operations, extractions and maintains a ConversionState. The conversion state is responsible for remembering which scalar/superword values have been computed already. The conversion manager is also responsible for gracefully resetting the function state in case an SLP graph is not deemed profitable. 
+
+* PatternVisitors.h
+  * Contains the visitor template and the LeafPatternVisitor, which can determine the scalar values that need to be computed for every leaf pattern (e.g. a BroadcastInsertPattern needs a scalar broadcast value and scalar insert values).
+
+* ScoreModel.h
+  * Contains the Look-Ahead-Score score model from the original Look-Ahead SLP publication [[1]](https://dl.acm.org/doi/10.1145/3168807). Also contains the XOR chain model.
+
+* Seeding.h
+  * Contains the classes used for top-down and bottom-up seeding.
+
+* SLPGraph.h
+  * Contains the superword logic and the logic for actual SLP graphs (nodes and multinodes). Note: there is no explicit SLPGraphEdge class or something similar.
+
+* SLPGraphBuilder.h
+  * Contains a graph builder that constructs SLP graphs as described in Porpodas et al. [[1]](https://dl.acm.org/doi/10.1145/3168807).
+
+* SLPPatternMatch.h
+  * Responsible for selecting the best patterns based on the cost model and the current conversion state.
+
+* SLPVectorizationPatterns.h
+  * The individual patterns that can be applied to superwords and their match and rewrite logic. They were designed in a somewhat similar fashion compared to MLIR's pattern rewrite framework.
+
+* Util.h
+  * Some utility functions, such as _vectorizable(...)_ or _commutative(...)_.
+
+### Known Issues ###
+* ShufflePattern: With shuffle patterns enabled, the output of the kernels sometimes does not match the expected output. This might be due to the reordering changing semantics and the shuffle pattern accessing elements with changed semantics by accident.
+* The SPN compiler options are replicated inside the util class. This is a little bit annoying.
+
+References
+-----
+[[1] Vasileios Porpodas, Rodrigo C. O. Rocha, and Luís F. W. Góes. 2018. Look-ahead SLP: auto-vectorization in the presence of commutative operations. In Proceedings of the 2018 International Symposium on Code Generation and Optimization (CGO 2018). Association for Computing Machinery, New York, NY, USA, 163–174.](https://dl.acm.org/doi/10.1145/3168807)
diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt
@@ -23,24 +23,21 @@ capnp_generate_cpp(CAPNP_SRCS CAPNP_HDRS ${CAPNPC_SRC_PREFIX}/xspn/xspn/serializ
 message(STATUS "Capnproto headers: ${CAPNP_HDRS}")
 
 add_library(spnc
-        src/driver/option/Options.cpp
+        src/pipeline/Pipeline.cpp
+        src/option/Options.cpp
         src/Driver.cpp
-        src/driver/util/Logging.cpp
-        src/driver/option/GlobalOptions.cpp
-        src/driver/action/LLVMWriteBitcode.cpp
-        src/driver/action/LLVMStaticCompiler.cpp
-        src/driver/action/EmitObjectCode.cpp
-        src/driver/action/LLVMLinker.cpp
-        src/driver/action/DetectTracingLib.cpp
-        src/driver/action/ClangKernelLinking.cpp
-        src/driver/toolchain/MLIRToolchain.cpp
-        src/driver/toolchain/CPUToolchain.cpp
-        src/codegen/mlir/conversion/HiSPNtoLoSPNConversion.cpp
-        src/codegen/mlir/conversion/LoSPNtoCPUConversion.cpp
-        src/codegen/mlir/conversion/CPUtoLLVMConversion.cpp
-        src/codegen/mlir/conversion/MLIRtoLLVMIRConversion.cpp
-        src/codegen/mlir/transformation/LoSPNTransformations.cpp
-        src/codegen/mlir/frontend/MLIRDeserializer.cpp
+        src/util/Logging.cpp
+        src/option/GlobalOptions.cpp
+        src/pipeline/steps/codegen/EmitObjectCode.cpp
+        src/pipeline/steps/linker/ClangKernelLinking.cpp
+        src/toolchain/MLIRToolchain.cpp
+        src/toolchain/CPUToolchain.cpp
+        src/pipeline/steps/mlir/conversion/HiSPNtoLoSPNConversion.cpp
+        src/pipeline/steps/mlir/conversion/LoSPNtoCPUConversion.cpp
+        src/pipeline/steps/mlir/conversion/CPUtoLLVMConversion.cpp
+        src/pipeline/steps/mlir/conversion/MLIRtoLLVMIRConversion.cpp
+        src/pipeline/steps/mlir/transformation/LoSPNTransformations.cpp
+        src/pipeline/steps/frontend/SPFlowToMLIRDeserializer.cpp
         ${CAPNP_SRCS}
         )
 
@@ -75,16 +72,17 @@ if (${CUDA_GPU_SUPPORT})
 
     # Additional sources for CUDA GPU support
     target_sources(spnc PRIVATE
-            src/codegen/mlir/conversion/GPUtoLLVMConversion.cpp
-            src/codegen/mlir/conversion/LoSPNtoGPUConversion.cpp
-            src/driver/toolchain/CUDAGPUToolchain.cpp
+            src/pipeline/steps/mlir/conversion/CUDA/CUDASerializeToCubin.cpp
+            src/pipeline/steps/mlir/conversion/GPUtoLLVMConversion.cpp
+            src/pipeline/steps/mlir/conversion/LoSPNtoGPUConversion.cpp
+            src/toolchain/CUDAGPUToolchain.cpp
             )
 
     # Additional include directories.
     target_include_directories(spnc PRIVATE ${CUDA_INCLUDE_DIRS})
 
     # Additional libraries to link
-    target_link_libraries(spnc PRIVATE ${CUDA_RUNTIME_LIBRARY} MLIRLoSPNtoGPUTransforms)
+    target_link_libraries(spnc PRIVATE ${CUDA_RUNTIME_LIBRARY} MLIRLoSPNtoGPUTransforms MLIRNVVMToLLVMIRTranslation)
 
     if (MLIR_CUDA_WRAPPERS)
         get_filename_component(MLIR_CUDA_RUNTIME_WRAPPERS_DIR ${MLIR_CUDA_WRAPPERS} DIRECTORY)
@@ -122,12 +120,12 @@ set(mlir_libs
         MLIRTranslation
         MLIRSupport
         MLIRExecutionEngine
-        MLIRTargetNVVMIR
+        MLIRNVVMIR
         )
 
 llvm_map_components_to_libnames(llvm_libs bitwriter native passes)
 target_link_libraries(spnc PRIVATE spnc-common ${llvm_libs} ${mlir_libs} spdlog::spdlog
-        CapnProto::capnp cpu_targetinfo)
+        CapnProto::capnp cpu_targetinfo stdc++fs)
 
 target_compile_options(spnc PRIVATE ${SPNC_CXX_WARNING_LEVEL})
 

diff --git a/compiler/include/driver/Actions.h b/compiler/include/driver/Actions.h