Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace onnx with ggml #22

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 0 additions & 38 deletions .github/workflows/main.ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,6 @@ jobs:
with:
packages: build-essential cmake wget tar

- if: matrix.os == 'ubuntu-20.04'
name: Download ONNXRuntime (Linux)
shell: bash
working-directory: ${{github.workspace}}
run: ./download_onnx_linux_x64.sh

- if: matrix.os == 'windows-latest'
name: Download ONNXRuntime (Windows)
shell: bash
working-directory: ${{github.workspace}}
run: ./download_onnx_windows_x64.sh

- name: Create Build Environment
run: cmake -E make_directory ${{github.workspace}}/build

Expand Down Expand Up @@ -90,13 +78,6 @@ jobs:
name: libaprilasr.dll
path: ${{github.workspace}}/build/Release/libaprilasr.dll

- if: matrix.os == 'windows-latest'
name: Upload onnxruntime.dll
uses: actions/[email protected]
with:
name: onnxruntime.dll
path: ${{github.workspace}}/lib/lib/onnxruntime.dll

- if: matrix.os == 'windows-latest'
name: Upload Python wheel (Windows)
uses: actions/[email protected]
Expand All @@ -111,13 +92,6 @@ jobs:
name: libaprilasr.so
path: ${{github.workspace}}/build/libaprilasr.so

- if: matrix.os == 'ubuntu-20.04'
name: Upload libonnxruntime.so
uses: actions/[email protected]
with:
name: libonnxruntime.so
path: ${{github.workspace}}/lib/lib/libonnxruntime.so

- if: matrix.os == 'ubuntu-20.04'
name: Upload Python wheel (Linux)
uses: actions/[email protected]
Expand Down Expand Up @@ -147,24 +121,12 @@ jobs:
name: libaprilasr.so
path: ${{github.workspace}}/bindings/csharp/nuget/build/lib/linux-x64/

- name: Download Linux onnxruntime
uses: actions/download-artifact@master
with:
name: libonnxruntime.so
path: ${{github.workspace}}/bindings/csharp/nuget/build/lib/linux-x64/

- name: Download Windows aprilasr
uses: actions/download-artifact@master
with:
name: libaprilasr.dll
path: ${{github.workspace}}/bindings/csharp/nuget/build/lib/win-x64/

- name: Download Windows onnxruntime
uses: actions/download-artifact@master
with:
name: onnxruntime.dll
path: ${{github.workspace}}/bindings/csharp/nuget/build/lib/win-x64/

- name: Display structure of downloaded files
run: ls -R
working-directory: ${{github.workspace}}/bindings/csharp/nuget/build/lib
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "src/ggml"]
path = src/ggml
url = https://github.com/ggerganov/ggml.git
96 changes: 11 additions & 85 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,89 +11,9 @@ project(april-asr

string(REPLACE "-" "" PROJECT_NAME_SO ${PROJECT_NAME})

if(WIN32)
set(CMAKE_SHARED_LIBRARY_PREFIX "lib")
set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
endif()
add_subdirectory(src/ggml)

cmake_policy(SET CMP0074 NEW)
if (DEFINED ENV{ONNX_ROOT})
set(ONNXRuntime_ROOT_DIR $ENV{ONNX_ROOT})
else()
set(ONNXRuntime_ROOT_DIR ${PROJECT_SOURCE_DIR}/lib)
endif()

# To statically link, set environment variable like so (example):
# ONNX_STATIC_ROOT=$HOME/Documents/onnxruntime/build/Linux/RelWithDebInfo
if (NOT DEFINED ENV{ONNX_STATIC_ROOT})
find_package(ONNXRuntime REQUIRED)
if(NOT ONNXRuntime_FOUND)
message(FATAL_ERROR "Could not find ONNXRuntime")
endif()

set(april_link_libraries ONNXRuntime::ONNXRuntime)
else()
set(april_link_libraries
$ENV{ONNX_STATIC_ROOT}/libonnxruntime_session.a
$ENV{ONNX_STATIC_ROOT}/libonnxruntime_optimizer.a
$ENV{ONNX_STATIC_ROOT}/libonnxruntime_providers.a
$ENV{ONNX_STATIC_ROOT}/libonnxruntime_framework.a
$ENV{ONNX_STATIC_ROOT}/libonnxruntime_graph.a
$ENV{ONNX_STATIC_ROOT}/libonnxruntime_util.a
$ENV{ONNX_STATIC_ROOT}/libonnxruntime_mlas.a
$ENV{ONNX_STATIC_ROOT}/libonnxruntime_common.a
$ENV{ONNX_STATIC_ROOT}/libonnxruntime_flatbuffers.a
$ENV{ONNX_STATIC_ROOT}/_deps/onnx-build/libonnx.a
$ENV{ONNX_STATIC_ROOT}/_deps/onnx-build/libonnx_proto.a
$ENV{ONNX_STATIC_ROOT}/_deps/protobuf-build/libprotobuf-lite.a
$ENV{ONNX_STATIC_ROOT}/_deps/re2-build/libre2.a
$ENV{ONNX_STATIC_ROOT}/_deps/flatbuffers-build/libflatbuffers.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/base/libabsl_base.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/base/libabsl_throw_delegate.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/container/libabsl_raw_hash_set.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/hash/libabsl_hash.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/hash/libabsl_city.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/hash/libabsl_low_level_hash.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/base/libabsl_raw_logging_internal.a
$ENV{ONNX_STATIC_ROOT}/_deps/google_nsync-build/libnsync_cpp.a
$ENV{ONNX_STATIC_ROOT}/_deps/pytorch_cpuinfo-build/libcpuinfo.a
$ENV{ONNX_STATIC_ROOT}/_deps/pytorch_cpuinfo-build/deps/clog/libclog.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/strings/libabsl_cord.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/strings/libabsl_cordz_info.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/strings/libabsl_cord_internal.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/strings/libabsl_cordz_functions.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/strings/libabsl_cordz_handle.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/container/libabsl_raw_hash_set.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/container/libabsl_hashtablez_sampler.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/profiling/libabsl_exponential_biased.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/synchronization/libabsl_synchronization.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/synchronization/libabsl_graphcycles_internal.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/debugging/libabsl_stacktrace.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/debugging/libabsl_symbolize.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/base/libabsl_malloc_internal.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/debugging/libabsl_debugging_internal.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/debugging/libabsl_demangle_internal.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/time/libabsl_time.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/time/libabsl_civil_time.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/time/libabsl_time_zone.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/hash/libabsl_hash.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/types/libabsl_bad_optional_access.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/strings/libabsl_strings.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/base/libabsl_throw_delegate.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/strings/libabsl_strings_internal.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/types/libabsl_bad_variant_access.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/hash/libabsl_city.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/hash/libabsl_low_level_hash.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/base/libabsl_base.a
$ENV{ONNX_STATIC_ROOT}/_deps/abseil_cpp-build/absl/base/libabsl_spinlock_wait.a
dl
rt
)

set(ORT_INCLUDE_DIR
$ENV{ONNX_STATIC_ROOT}/../../../include/onnxruntime/core/session/
)
endif()

if (DEFINED ENV{APRIL_DEBUG_SAVE_AUDIO})
add_compile_definitions(APRIL_DEBUG_SAVE_AUDIO)
Expand All @@ -107,10 +27,10 @@ set(april_sources
src/proc_thread.c
src/params.c
src/fbank.c
src/ort_util.c
src/file/model_file.c
src/fft/pocketfft.c
src/sonic/sonic.c
src/model_impl.cpp
)

file(GLOB_RECURSE april_headers "*.h")
Expand All @@ -133,13 +53,13 @@ if(NOT WIN32)
list(APPEND april_link_libraries "m")
endif()

include_directories(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${ORT_INCLUDE_DIR})
include_directories(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/src/ggml/include)

add_library(aprilasr_static STATIC ${april_sources})
target_link_libraries(aprilasr_static ${april_link_libraries})
target_link_libraries(aprilasr_static ggml ${april_link_libraries})

add_library(aprilasr SHARED ${april_sources})
target_link_libraries(aprilasr ${april_link_libraries})
target_link_libraries(aprilasr ggml ${april_link_libraries})

set_target_properties(aprilasr PROPERTIES VERSION ${CMAKE_PROJECT_VERSION}
SOVERSION ${PROJECT_VERSION_MAJOR} )
Expand All @@ -151,6 +71,12 @@ target_link_libraries(main PRIVATE aprilasr_static ${april_link_libraries})
add_executable(srt example_srt.cpp)
target_link_libraries(srt PRIVATE aprilasr_static ${april_link_libraries})

add_executable(stress rt_stress.cpp)
target_link_libraries(stress PRIVATE aprilasr_static ${april_link_libraries})

add_executable(quantize src/tools/quantize.cpp)
target_link_libraries(quantize PRIVATE aprilasr_static ${april_link_libraries})

install(TARGETS aprilasr
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME})
Expand Down
41 changes: 4 additions & 37 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,27 +31,6 @@ Currently only one model is available, the [English model](https://april.sapples
To make your own models, check out `extra/exporting-howto.md`

## Building on Linux
Building requires ONNXRuntime v1.13.1. You can either try to build it from source or just download the release binaries.

### Downloading ONNXRuntime
Run `./download_onnx_linux_x64.sh` for linux-x64.

For other platforms the script should be very similar, or visit https://github.com/microsoft/onnxruntime/releases/tag/v1.13.1 and download the right zip/tgz file for your platform and extract the contents to a directory named `lib`.

You may also define the env variable `ONNX_ROOT` containing a path to where you extracted the archive, if placing it in `lib` isn't a choice.

### Building ONNXRuntime from source (untested)
You don't need to do this if you've downloaded ONNXRuntime.

Follow the instructions here: https://onnxruntime.ai/docs/how-to/build/inferencing.html#linux

then run
```
cd build/Linux/RelWithDebInfo/
sudo make install
```

### Building aprilasr
Run:
```
$ mkdir build
Expand All @@ -60,33 +39,21 @@ $ cmake -DCMAKE_BUILD_TYPE=Release ..
$ make -j4
```

You should now have `main`, `libaprilasr.so` and `libaprilasr_static.so`.

If running `main` fails because it can't find `libonnxruntime.so.1.13.1`, you may need to make `libonnxruntime.so.1.13.1` accessible like so:
```
$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/../lib/lib/
```
You should now have `main` and `libaprilasr.so`

## Building on Windows (msvc)
Create a folder called `lib` in the april-asr folder.

Download [onnxruntime-win-x64-1.13.1.zip](https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-win-x64-1.13.1.zip) and extract the insides of the onnxruntime-win-x64-1.13.1 folder to the `lib` folder

Run cmake to configure and generate Visual Studio project files. Make sure you select x64 as the target if you have downloaded the x64 version of ONNXRuntime.
Run cmake to configure and generate Visual Studio project files.

Open the `ALL_BUILD.vcxproj` and everything should build. The output will be in the Release or Debug folders.

When running main.exe you may receive an error message like this:
> The application was unable to start correctly (0xc000007b)

To fix this, you need to make onnxruntime.dll available. One way to do this is to copy onnxruntime.dll from lib/lib/onnxruntime.dll to build/Debug and build/Release. You may need to distribute the dll together with your application.

## Applications
Currently I'm developing [Live Captions](https://github.com/abb128/LiveCaptions), a Linux desktop app that provides live captioning.

## Acknowledgements
Thanks to the [k2-fsa/icefall](https://github.com/k2-fsa/icefall) contributors for creating the speech recognition recipes and models.

Thanks to the developers of [ggml](https://github.com/ggerganov/ggml) and [llama.cpp](https://github.com/ggerganov/llama.cpp), who have created a performant, lightweight and flexible C tensor library.

This project makes use of a few libraries:
* pocketfft, authored by Martin Reinecke, Copyright (C) 2008-2018 Max-Planck-Society, licensed under BSD-3-Clause
* Sonic library, authored by Bill Cox, Copyright (C) 2010 Bill Cox, licensed under Apache 2.0 license
Expand Down
6 changes: 0 additions & 6 deletions april-docs/src/concepts.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,3 @@ For example, the text `"THAT'S COOL ELEPHANTS"` may be represented as tokens lik
* Simply concatenating these strings will give you the correct `" THAT'S COOL ELEPHANTS"`, but with an extra space at the beginning. You may want to strip the final string to avoid the extra space.

Tokens contain more data than just the string however. They also contain the log probability, and a boolean denoting whether or not it's a word boundary. In English, the word boundary value is equivalent to checking if the first character is a space.

## Dependencies

AprilASR depends on ONNXRuntime for ML inference. You will need both libraries for it to work:
* Linux: `libaprilasr.so` and `libonnxruntime.so`
* Windows: `libaprilasr.dll` and `onnxruntime.dll`
2 changes: 1 addition & 1 deletion april_api.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 abb128
* Copyright (C) 2025 abb128
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down
2 changes: 1 addition & 1 deletion bindings/csharp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Open Developer PowerShell for VS 2022 and cd into the nuget directory. Run `.\bu

Make sure you've built libaprilasr.so.

If you've done a build with onnxruntime statically linked to libaprilasr.so, run `./build.sh -s`. Otherwise, run `./build.sh`
Run `./build.sh`


## Example
Expand Down
1 change: 0 additions & 1 deletion bindings/csharp/nuget/build.bat
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
cp ..\..\..\build\Release\libaprilasr.dll .\build\lib\win-x64\
cp ..\..\..\lib\lib\onnxruntime.dll .\build\lib\win-x64\
csc /t:library /out:lib/netstandard2.0/AprilAsr.dll src/*.cs
nuget pack
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ private static void unpackDll(File targetDir, String lib) throws IOException {
File tmpFile = Native.extractFromResourcePath("/win32-x86-64/empty", AprilAsrNative.class.getClassLoader());
File tmpDir = tmpFile.getParentFile();
new File(tmpDir, tmpFile.getName() + ".x").createNewFile();

// Now unpack dependencies
unpackDll(tmpDir, "onnxruntime");
} catch (IOException e) {
// Nothing for now, it will fail on next step
} finally {
Expand Down
9 changes: 0 additions & 9 deletions bindings/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,13 @@
PRECOMP_LIBS = [
"libaprilasr.so",
"Release/libaprilasr.dll",
"../lib/lib/libonnxruntime.so",
"../lib/lib/onnxruntime.dll"
]

for l in PRECOMP_LIBS:
for lib in glob.glob(os.path.join(april_build, l)):
print("Adding library", lib)
shutil.copy(lib, "april_asr")

# Ensure has the correct suffix (e.g. libonnxruntime.so.1.13.1)
for lib in glob.glob("../lib/lib/libonnxruntime.so.*"):
shutil.move(
"april_asr/libonnxruntime.so",
"april_asr/libonnxruntime.so" + lib.split("libonnxruntime.so")[1]
)

# Create OS-dependent, but Python-independent wheels.
try:
from wheel.bdist_wheel import bdist_wheel
Expand Down
Loading
Loading