Document not found (404)
+This URL is invalid, sorry. Please use the navigation bar or search to continue.
+ +diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..9696162 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,58 @@ +name: Deploy +on: + push: + branches: + - main + +jobs: + deploy: + runs-on: ubuntu-latest + permissions: + contents: write # To push a branch + pull-requests: write # To create a PR from that branch + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install doxygen + uses: awalsh128/cache-apt-pkgs-action@v1.4.3 + with: + packages: doxygen + + - name: Install pdoc3 + run: python3 -m pip install pdoc3 + + - name: Build C# API reference + working-directory: ${{github.workspace}}/april-docs/src/csharp + run: doxygen Doxyfile + + - name: Build Python API reference + working-directory: ${{github.workspace}}/april-docs + run: | + GH_DOCS_CI_DONT_LOAD_APRIL_ASR=1 pdoc3 ../bindings/python/april_asr --skip-errors --html --force -o src + sed -i 's/april_asr\._april/april_asr/g' src/april_asr/*.html + + - name: Install mdbook + run: | + mkdir mdbook + curl -sSL https://github.com/rust-lang/mdBook/releases/download/v0.4.27/mdbook-v0.4.27-x86_64-unknown-linux-gnu.tar.gz | tar -xz --directory=./mdbook + echo `pwd`/mdbook >> $GITHUB_PATH + + - name: Deploy GitHub Pages + run: | + # This assumes your book is in the root of your repository. + # Just add a `cd` here if you need to change to another directory. + cd april-docs + mdbook build + git worktree add gh-pages + git config user.name "Deploy from CI" + git config user.email "" + cd gh-pages + # Delete the ref to avoid keeping history. + git update-ref -d refs/heads/gh-pages + rm -rf * + mv ../book/* . + git add . + git commit -m "Deploy $GITHUB_SHA to gh-pages" + git push --force --set-upstream origin gh-pages \ No newline at end of file diff --git a/.github/workflows/main.ci.yml b/.github/workflows/main.ci.yml new file mode 100644 index 0000000..729e946 --- /dev/null +++ b/.github/workflows/main.ci.yml @@ -0,0 +1,188 @@ +name: AprilASR CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + name: Build library + strategy: + fail-fast: false + matrix: + os: [ubuntu-20.04, windows-latest] + + runs-on: ${{ matrix.os }} + + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - if: matrix.os == 'ubuntu-20.04' + name: Install apt packages + uses: awalsh128/cache-apt-pkgs-action@v1.4.3 + with: + packages: build-essential cmake wget tar + + - if: matrix.os == 'ubuntu-20.04' + name: Download ONNXRuntime (Linux) + shell: bash + working-directory: ${{github.workspace}} + run: ./download_onnx_linux_x64.sh + + - if: matrix.os == 'windows-latest' + name: Download ONNXRuntime (Windows) + shell: bash + working-directory: ${{github.workspace}} + run: ./download_onnx_windows_x64.sh + + - name: Create Build Environment + run: cmake -E make_directory ${{github.workspace}}/build + + - if: matrix.os == 'windows-latest' + name: Configure CMake (Windows) + shell: bash + working-directory: ${{github.workspace}}/build + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DUSE_TINYCTHREAD=ON + + - if: matrix.os == 'ubuntu-20.04' + name: Configure CMake (Linux) + shell: bash + working-directory: ${{github.workspace}}/build + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON + + - name: Build libaprilasr + working-directory: ${{github.workspace}}/build + shell: bash + run: cmake --build . --config RELEASE + + - name: Ensure wheel and auditwheel are installed + run: | + python3 -m pip install --upgrade pip + python3 -m pip install wheel auditwheel + + - if: matrix.os == 'ubuntu-20.04' + name: Update patchelf on Linux + run: python3 -m pip install --upgrade patchelf + + - name: Build Python whl + working-directory: ${{github.workspace}}/bindings/python + shell: bash + run: python3 setup.py bdist_wheel + + - if: matrix.os == 'ubuntu-20.04' + name: Fix Linux wheel + working-directory: ${{github.workspace}}/bindings/python + shell: bash + run: mkdir wheelhouse && auditwheel repair dist/april_asr-*-py3-none-linux_x86_64.whl --plat manylinux_2_31_x86_64 + + - if: matrix.os == 'windows-latest' + name: Upload libaprilasr.dll + uses: actions/upload-artifact@v4 + with: + name: libaprilasr.dll + path: ${{github.workspace}}/build/Release/libaprilasr.dll + + - if: matrix.os == 'windows-latest' + name: Upload onnxruntime.dll + uses: actions/upload-artifact@v4 + with: + name: onnxruntime.dll + path: ${{github.workspace}}/lib/lib/onnxruntime.dll + + - if: matrix.os == 'windows-latest' + name: Upload Python wheel (Windows) + uses: actions/upload-artifact@v4 + with: + name: windows-python-dist + path: ${{github.workspace}}/bindings/python/dist + + - if: matrix.os == 'ubuntu-20.04' + name: Upload libaprilasr.so + uses: actions/upload-artifact@v4 + with: + name: libaprilasr.so + path: ${{github.workspace}}/build/libaprilasr.so + + - if: matrix.os == 'ubuntu-20.04' + name: Upload libonnxruntime.so + uses: actions/upload-artifact@v4 + with: + name: libonnxruntime.so + path: ${{github.workspace}}/lib/lib/libonnxruntime.so + + - if: matrix.os == 'ubuntu-20.04' + name: Upload Python wheel (Linux) + uses: actions/upload-artifact@v4 + with: + name: ubuntu-python-dist + path: ${{github.workspace}}/bindings/python/wheelhouse + nupkg: + name: Build nupkg + needs: build + runs-on: ubuntu-20.04 + + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install apt packages + uses: awalsh128/cache-apt-pkgs-action@v1.4.3 + with: + packages: mono-mcs nuget + + - name: Ensure nuget is installed + run: sudo apt-get install -y nuget + working-directory: ${{github.workspace}} + + - name: Download Linux aprilasr + uses: actions/download-artifact@v4 + with: + name: libaprilasr.so + path: ${{github.workspace}}/bindings/csharp/nuget/build/lib/linux-x64/ + + - name: Download Linux onnxruntime + uses: actions/download-artifact@v4 + with: + name: libonnxruntime.so + path: ${{github.workspace}}/bindings/csharp/nuget/build/lib/linux-x64/ + + - name: Download Windows aprilasr + uses: actions/download-artifact@v4 + with: + name: libaprilasr.dll + path: ${{github.workspace}}/bindings/csharp/nuget/build/lib/win-x64/ + + - name: Download Windows onnxruntime + uses: actions/download-artifact@v4 + with: + name: onnxruntime.dll + path: ${{github.workspace}}/bindings/csharp/nuget/build/lib/win-x64/ + + - name: Display structure of downloaded files + run: ls -R + working-directory: ${{github.workspace}}/bindings/csharp/nuget/build/lib + + - name: Build C# bindings + working-directory: ${{github.workspace}}/bindings/csharp/nuget + run: mcs -out:lib/netstandard2.0/AprilAsr.dll -target:library src/*.cs + + - name: Run nuget pack + working-directory: ${{github.workspace}}/bindings/csharp/nuget + run: mkdir out && nuget pack -Verbosity detailed -OutputDirectory out + + - name: Upload nupkg + uses: actions/upload-artifact@v4 + with: + name: nupkg + path: ${{github.workspace}}/bindings/csharp/nuget/out \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dc153dd --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +.cache +.vs +/build +/build2 +/lib +**/__pycache__ +*.so +*.egg-info +*.lib +*.pdb +*.dll +*.exp +*.exe +*.nupkg +bindings/csharp/AprilAsrDemo/bin +bindings/csharp/AprilAsrDemo/obj +bindings/python/build +bindings/python/dist +april-docs/src/csharp/html +april-docs/src/april_asr +/result +/result-dev diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..c476288 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,14 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Launch", + "env": {"LD_LIBRARY_PATH": "${workspaceFolder}/lib/lib", "APRIL_LOG_LEVEL": "DEBUG"}, + "program": "${workspaceFolder}/build/main", + "args": ["${workspaceFolder}/build/test.wav", "${workspaceFolder}/build/aprilv0_en-us.april"], + "cwd": "${workspaceFolder}" + }, + ] +} \ No newline at end of file diff --git a/404.html b/404.html new file mode 100644 index 0000000..d6fa01e --- /dev/null +++ b/404.html @@ -0,0 +1,187 @@ + + +
+ + +This URL is invalid, sorry. Please use the navigation bar or search to continue.
+ +april_asr.example
april_asr
april_asr provides Python bindings for the aprilasr library.
+aprilasr provides an API for offline streaming speech-to-text applications, and +enables low-latency on-device realtime speech recognition for live captioning +or other speech recognition use cases.
+april_asr.example
+class Model
+(path: str)
+
class Model:
+ """
+ Models end with the file extension `.april`. You need to pass a path to
+ such a file to construct a Model type.
+
+ Each model has its own sample rate in which it expects audio. There is a
+ method to get the expected sample rate. Usually, this is 16000 Hz.
+
+ Models also have additional metadata such as name, description, language.
+
+ After loading a model, you can create one or more sessions that use the
+ model.
+ """
+ def __init__(self, path: str):
+ self._handle = _c.ffi.aam_create_model(path)
+
+ if self._handle is None:
+ raise Exception("Failed to load model")
+
+ def get_name(self) -> str:
+ """Get the name from the model's metadata"""
+ return _c.ffi.aam_get_name(self._handle)
+
+ def get_description(self) -> str:
+ """Get the description from the model's metadata"""
+ return _c.ffi.aam_get_description(self._handle)
+
+ def get_language(self) -> str:
+ """Get the language from the model's metadata"""
+ return _c.ffi.aam_get_language(self._handle)
+
+ def get_sample_rate(self) -> int:
+ """Get the sample rate from the model's metadata"""
+ return _c.ffi.aam_get_sample_rate(self._handle)
+
+ def __del__(self):
+ _c.ffi.aam_free(self._handle)
+ self._handle = None
+Models end with the file extension .april
. You need to pass a path to
+such a file to construct a Model type.
Each model has its own sample rate in which it expects audio. There is a +method to get the expected sample rate. Usually, this is 16000 Hz.
+Models also have additional metadata such as name, description, language.
+After loading a model, you can create one or more sessions that use the +model.
+def get_description(self) ‑> str
+
def get_description(self) -> str:
+ """Get the description from the model's metadata"""
+ return _c.ffi.aam_get_description(self._handle)
+Get the description from the model's metadata
+def get_language(self) ‑> str
+
def get_language(self) -> str:
+ """Get the language from the model's metadata"""
+ return _c.ffi.aam_get_language(self._handle)
+Get the language from the model's metadata
+def get_name(self) ‑> str
+
def get_name(self) -> str:
+ """Get the name from the model's metadata"""
+ return _c.ffi.aam_get_name(self._handle)
+Get the name from the model's metadata
+def get_sample_rate(self) ‑> int
+
def get_sample_rate(self) -> int:
+ """Get the sample rate from the model's metadata"""
+ return _c.ffi.aam_get_sample_rate(self._handle)
+Get the sample rate from the model's metadata
+class Result
+(*args, **kwds)
+
class Result(IntEnum):
+ """
+ Result type that is passed to your handler
+ """
+
+ PARTIAL_RECOGNITION = 1,
+ """A partial recognition. The next handler call will contain an updated
+ list of tokens."""
+
+ FINAL_RECOGNITION = 2,
+ """A final recognition. The next handler call will start from an empty
+ token list."""
+
+ ERROR_CANT_KEEP_UP = 3,
+ """In an asynchronous session, this may be called when the system can't
+ keep up with the incoming audio, and samples have been dropped. The
+ accuracy will be affected. An empty token list is given"""
+
+ SILENCE = 4
+ """Called after some silence. An empty token list is given"""
+Result type that is passed to your handler
var ERROR_CANT_KEEP_UP
In an asynchronous session, this may be called when the system can't +keep up with the incoming audio, and samples have been dropped. The +accuracy will be affected. An empty token list is given
var FINAL_RECOGNITION
A final recognition. The next handler call will start from an empty +token list.
var PARTIAL_RECOGNITION
A partial recognition. The next handler call will contain an updated +list of tokens.
var SILENCE
Called after some silence. An empty token list is given
+class Session
+(model: april_asr.Model,
callback: Callable[[april_asr.Result, List[april_asr.Token]], None],
asynchronous: bool = False,
no_rt: bool = False,
speaker_name: str = '')
+
class Session:
+ """
+ The session is what performs the actual speech recognition. It has
+ methods to input audio, and it calls your given handler with decoded
+ results.
+
+ You need to pass a Model when constructing a Session.
+ """
+ def __init__(self,
+ model: Model,
+ callback: Callable[[Result, List[Token]], None],
+ asynchronous: bool = False,
+ no_rt: bool = False,
+ speaker_name: str = ""
+ ):
+ config = _c.AprilConfig()
+ config.flags = _c.AprilConfigFlagBits()
+
+ if asynchronous and no_rt:
+ config.flags.value = 2
+ elif asynchronous:
+ config.flags.value = 1
+ else:
+ config.flags.value = 0
+
+ if speaker_name != "":
+ spkr_data = struct.pack("@q", hash(speaker_name)) * 2
+ config.speaker = _c.AprilSpeakerID.from_buffer_copy(spkr_data)
+
+ config.handler = _HANDLER
+ config.userdata = id(self)
+
+ self.model = model
+ self._handle = _c.ffi.aas_create_session(model._handle, config)
+ if self._handle is None:
+ raise Exception()
+
+ self.callback = callback
+
+ def get_rt_speedup(self) -> float:
+ """
+ If the session is asynchronous and realtime, this will return a
+ positive float. A value below 1.0 means the session is keeping up, and
+ a value greater than 1.0 means the input audio is being sped up by that
+ factor in order to keep up. When the value is greater 1.0, the accuracy
+ is likely to be affected.
+ """
+ return _c.ffi.aas_realtime_get_speedup(self._handle)
+
+ def feed_pcm16(self, data: bytes) -> None:
+ """
+ Feed the given pcm16 samples in bytes to the session. If the session is
+ asynchronous, this will return immediately and queue the data for the
+ background thread to process. If the session is not asynchronous, this
+ will block your thread and potentially call the handler before
+ returning.
+ """
+ _c.ffi.aas_feed_pcm16(self._handle, data)
+
+ def flush(self) -> None:
+ """
+ Flush any remaining samples and force the session to produce a final
+ result.
+ """
+ _c.ffi.aas_flush(self._handle)
+
+ def __del__(self):
+ _c.ffi.aas_free(self._handle)
+ self.model = None
+ self._handle = None
+The session is what performs the actual speech recognition. It has +methods to input audio, and it calls your given handler with decoded +results.
+You need to pass a Model when constructing a Session.
+def feed_pcm16(self, data: bytes) ‑> None
+
def feed_pcm16(self, data: bytes) -> None:
+ """
+ Feed the given pcm16 samples in bytes to the session. If the session is
+ asynchronous, this will return immediately and queue the data for the
+ background thread to process. If the session is not asynchronous, this
+ will block your thread and potentially call the handler before
+ returning.
+ """
+ _c.ffi.aas_feed_pcm16(self._handle, data)
+Feed the given pcm16 samples in bytes to the session. If the session is +asynchronous, this will return immediately and queue the data for the +background thread to process. If the session is not asynchronous, this +will block your thread and potentially call the handler before +returning.
+def flush(self) ‑> None
+
def flush(self) -> None:
+ """
+ Flush any remaining samples and force the session to produce a final
+ result.
+ """
+ _c.ffi.aas_flush(self._handle)
+Flush any remaining samples and force the session to produce a final +result.
+def get_rt_speedup(self) ‑> float
+
def get_rt_speedup(self) -> float:
+ """
+ If the session is asynchronous and realtime, this will return a
+ positive float. A value below 1.0 means the session is keeping up, and
+ a value greater than 1.0 means the input audio is being sped up by that
+ factor in order to keep up. When the value is greater 1.0, the accuracy
+ is likely to be affected.
+ """
+ return _c.ffi.aas_realtime_get_speedup(self._handle)
+If the session is asynchronous and realtime, this will return a +positive float. A value below 1.0 means the session is keeping up, and +a value greater than 1.0 means the input audio is being sped up by that +factor in order to keep up. When the value is greater 1.0, the accuracy +is likely to be affected.
+class Token
+(token)
+
class Token:
+ """
+ A token may be a single letter, a word chunk, an entire word, punctuation,
+ or other arbitrary set of characters.
+
+ To convert a token array to a string, simply concatenate the strings from
+ each token. You don't need to add spaces between tokens, the tokens
+ contain their own formatting.
+
+ Tokens also contain the log probability, and a boolean denoting whether or
+ not it's a word boundary. In English, the word boundary value is equivalent
+ to checking if the first character is a space.
+ """
+
+ token: str = ""
+ logprob: float = 0.0
+ word_boundary: bool = False
+ sentence_end: bool = False
+ time: float = 0.0
+
+ def __init__(self, token):
+ self.token = token.token.decode("utf-8")
+ self.logprob = token.logprob
+ self.word_boundary = (token.flags.value & 1) != 0
+ self.sentence_end = (token.flags.value & 2) != 0
+ self.time = float(token.time_ms) / 1000.0
+A token may be a single letter, a word chunk, an entire word, punctuation, +or other arbitrary set of characters.
+To convert a token array to a string, simply concatenate the strings from +each token. You don't need to add spaces between tokens, the tokens +contain their own formatting.
+Tokens also contain the log probability, and a boolean denoting whether or +not it's a word boundary. In English, the word boundary value is equivalent +to checking if the first character is a space.
var logprob : float
var sentence_end : bool
var time : float
var token : str
var word_boundary : bool
Before writing any code, it's recommended to understand these concepts. These apply to all of the language bindings.
+Models end with the file extension .april
. You can load these files using the AprilASR API.
Each model has its own sample rate in which it expects audio. There is a method to get the expected sample rate. Usually, this is 16000 Hz.
+Models also have additional metadata such as name, description, language.
+After loading a model, you can create one or more sessions that use the model.
+In the most common case, you will have one session based on one model.
+The session is what performs the actual speech recognition. It has methods to input audio, and it calls your given handler with decoded results.
+ +To perform speech-to-text, feed PCM16 audio of the speech to the session through the feed_pcm16
method (or equivalent in the language). Make sure it's in the correct sample rate and mono.
PCM16 means array of shorts with values between -32768 to 32767, each one describing one sample.
+After calling feed_pcm16
, the session will invoke the neural network and call your specified handler with a result. You can present this result to the user or do whatever you want with the result.
In more advanced use cases, you may have multiple sessions performing recognition on multiple separate audio streams. When doing this, you can re-use the same model to minimize the memory use.
+The simplest (and default) mode of operation are synchronous sessions.
+In a synchronous session, when you call the function to feed audio, it will process the audio synchronously, call the handler if a new result is decoded, and finally return once it's done.
+This means that calls to feed audio are extremely slow. This may be undesirable in some cases, such as in a live captioning situation. For this reason, you can choose to construct asynchronous sessions instead.
+An asynchronous session does not perform calculations on the calling thread.
+Calls to feed audio are quick, as it copies the data and triggers a second thread to do the actual calculations. The second thread calls the handler at some point, when processing is done.
+A caveat is that you must feed audio at a rate that comes out to 1 second per second. You should not feed multiple seconds or minutes at once. The internal buffer cannot fit more than a few seconds.
+Asynchronous sessions are intended for streaming audio as it comes in, for live captioning for example. If you feed more than 1 second every second, you will get poor results (if any).
+In an asynchronous session, there is a problem that the system may not be fast enough to process audio at the rate that it's coming in. This is where realtime and non-realtime sessions differ in behavior.
+A realtime session will work around this by automatically deciding to speed up incoming audio to a rate where the system can keep up. This involves some audio processing code, which may or may not be desirable.
+Speeding up audio may reduce accuracy. It may not be severe at small values (such as 1.2x), but at larger values (such as over 2.0x) the accuracy may be severely impacted. There is a method you can call to get the current speedup value to know when this is happening, so you can display a warning to the user or similar.
+A non-realtime session ignores this problem and assumes the system is fast enough. If this is not the case, the results will fall behind, the internal buffer will get full, ErrorCantKeepUp
result will be called, and the results will be disastrously horrible.
The results are given via a callback (handler). It gets called by the session whenever it has new results. The parameters given to the callback include the result type and the token array.
+Note that in an asynchronous session, the handler will be called from a different thread. Be sure to expect this and write thread-safe code, or use a synchronous session.
+You should try to make your handler function fast to avoid slowing down the session.
+The actual text can be extracted from the token array.
+The handler gets called with an enum explaining the result type:
+A token may be a single letter, a word chunk, an entire word, punctuation, or other arbitrary set of characters.
+To convert a token array to a string, simply concatenate the strings from each token. You don't need to add spaces between tokens, the tokens contain their own formatting.
+For example, the text "THAT'S COOL ELEPHANTS"
may be represented as tokens like so:
[" THAT", "'", "S", " CO", "OL", " E", "LE", "P", "H", "ANT", "S"]
" THAT'S COOL ELEPHANTS"
, but with an extra space at the beginning. You may want to strip the final string to avoid the extra space.Tokens contain more data than just the string however. They also contain the log probability, and a boolean denoting whether or not it's a word boundary. In English, the word boundary value is equivalent to checking if the first character is a space.
+AprilASR depends on ONNXRuntime for ML inference. You will need both libraries for it to work:
+libaprilasr.so
and libonnxruntime.so
libaprilasr.dll
and onnxruntime.dll
Install the nuget package from https://www.nuget.org/packages/AprilAsr
+To get started, import AprilAsr
+using AprilAsr;
+
+You can load a model like so:
+string modelPath = "/path/to/model.april";
+AprilModel model = new AprilModel(modelPath);
+
+Models have a few metadata fields:
+string name = model.Name;
+string description = model.Description;
+string language = model.Language;
+int sampleRate = model.SampleRate;
+
+A session needs a callback. You can define one inline, this example concatenates the tokens to a string and prints it.
+AprilSession session = new AprilSession(model, (result, tokens) => {
+ if (tokens == null) return;
+
+ string s = "";
+ if(result == AprilResultKind.PartialRecognition) {
+ s = "- ";
+ }else if(result == AprilResultKind.FinalRecognition) {
+ s = "@ ";
+ }else{
+ s = " ";
+ }
+
+ foreach(AprilToken token in tokens) {
+ s += token.Token;
+ }
+
+ Console.WriteLine(s);
+});
+
+There are more options when it comes to creating a session, here is the initializer signature:
+public AprilSession(AprilModel model, SessionCallback callback, bool async = false, bool noRT = false, string speakerName = "") {
+
+Refer to the General Concepts page for an explanation on asynchronous, non-realtime, and speaker name options
+Most of the examples use a very simple method like this to load and feed audio:
+// Read the file data (assumes wav file is 16-bit PCM wav)
+var fileData = File.ReadAllBytes(wavFilePath);
+short[] shorts = new short[fileData.Length / 2];
+Buffer.BlockCopy(fileData, 0, shorts, 0, fileData.Length);
+
+// Feed the data
+session.FeedPCM16(shorts, shorts.Length);
+
+This works only if the wav file is PCM16 and sampled in the correct sample rate. When you attempt to load an mp3, non-PCM16/non-16kHz wav file, or any other audio file in this way, you will likely get gibberish or no results.
+ +Asynchronous sessions are a little more complicated. You can create one by setting the asynchronous flag to true:
+AprilSession session = new AprilSession(..., async: true);
+
+Now, when feeding audio, be sure to feed it in realtime.
+var fileData = File.ReadAllBytes(wavFilePath);
+short[] shorts = new short[2400];
+
+for(int i=0; i<(fileData.Length/2); i+=shorts.Length){
+ int size = Math.Min(shorts.Length, (fileData.Length/2) - i);
+ Buffer.BlockCopy(fileData, i*2, shorts, 0, size*2);
+ session.FeedPCM16(shorts, size);
+ Thread.Sleep(size * 1000 / model.SampleRate);
+}
+
+session.Flush();
+
+using AprilAsr;
+
+var modelPath = "aprilv0_en-us.april";
+var wavFilePath = "audio.wav";
+
+// Load the model and print metadata
+var model = new AprilModel(modelPath);
+Console.WriteLine("Name: " + model.Name);
+Console.WriteLine("Description: " + model.Description);
+Console.WriteLine("Language: " + model.Language);
+
+// Create the session with an inline callback
+var session = new AprilSession(model, (result, tokens) => {
+ string s = "";
+ if(result == AprilResultKind.PartialRecognition) {
+ s = "- ";
+ }else if(result == AprilResultKind.FinalRecognition) {
+ s = "@ ";
+ }else{
+ s = " ";
+ }
+
+ foreach(AprilToken token in tokens) {
+ s += token.Token;
+ }
+
+ Console.WriteLine(s);
+});
+
+// Read the file data (assumes wav file is 16-bit PCM wav)
+var fileData = File.ReadAllBytes(wavFilePath);
+short[] shorts = new short[fileData.Length / 2];
+Buffer.BlockCopy(fileData, 0, shorts, 0, fileData.Length);
+
+// Feed the data and flush
+session.FeedPCM16(shorts, shorts.Length);
+session.Flush();
+
+Congratulations! You have just performed speech recognition using AprilAsr!
+ +