Skip to content

Commit df4615c

Browse files
authored
Add C/CXX/JavaScript API for NeMo Canary models (#2357)
This PR introduces support for NeMo Canary models across C, C++, and JavaScript APIs by adding new Canary configuration structures, updating bindings, extending examples, and enhancing CI workflows. - Add OfflineCanaryModelConfig to all language bindings (C, C++, JS, ETS). - Implement SetConfig methods and NAPI wrappers for updating recognizer config at runtime. - Update examples and CI scripts to demonstrate and test NeMo Canary model usage.
1 parent 0e738c3 commit df4615c

File tree

28 files changed

+736
-66
lines changed

28 files changed

+736
-66
lines changed

.github/scripts/test-nodejs-addon-npm.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@ arch=$(node -p "require('os').arch()")
1010
platform=$(node -p "require('os').platform()")
1111
node_version=$(node -p "process.versions.node.split('.')[0]")
1212

13+
echo "----------non-streaming ASR NeMo Canary----------"
14+
15+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
16+
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
17+
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
18+
19+
node ./test_asr_non_streaming_nemo_canary.js
20+
21+
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
22+
1323
echo "----------non-streaming ASR Zipformer CTC----------"
1424
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
1525

.github/scripts/test-nodejs-npm.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@ git status
99
ls -lh
1010
ls -lh node_modules
1111

12+
# asr with offline nemo canary
13+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
14+
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
15+
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
16+
17+
node ./test-offline-nemo-canary.js
18+
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
19+
1220
# asr with offline zipformer ctc
1321
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
1422

.github/scripts/test-online-ctc.sh

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,6 @@ time $EXE \
7777
$repo/test_wavs/DEV_T0000000001.wav \
7878
$repo/test_wavs/DEV_T0000000002.wav
7979

80-
log "test int8"
81-
82-
time $EXE \
83-
--debug=1 \
84-
--zipformer2-ctc-model=$repo/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
85-
--tokens=$repo/tokens.txt \
86-
$repo/test_wavs/DEV_T0000000000.wav \
87-
$repo/test_wavs/DEV_T0000000001.wav \
88-
$repo/test_wavs/DEV_T0000000002.wav
89-
9080
rm -rf $repo
9181

9282
log "------------------------------------------------------------"

.github/workflows/c-api.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,36 @@ jobs:
127127
rm -rf dict lexicon.txt test-hr.wav replace.fst
128128
rm -v $name
129129
130+
- name: Test NeMo Canary
131+
shell: bash
132+
run: |
133+
name=nemo-canary-c-api
134+
gcc -o $name ./c-api-examples/$name.c \
135+
-I ./build/install/include \
136+
-L ./build/install/lib/ \
137+
-l sherpa-onnx-c-api \
138+
-l onnxruntime
139+
140+
ls -lh $name
141+
142+
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
143+
ldd ./$name
144+
echo "----"
145+
readelf -d ./$name
146+
fi
147+
148+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
149+
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
150+
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
151+
152+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
153+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
154+
155+
./$name
156+
157+
rm $name
158+
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
159+
130160
- name: Test Dolphin CTC
131161
shell: bash
132162
run: |

.github/workflows/cxx-api.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,40 @@ jobs:
8787
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
8888
fi
8989
90+
- name: Test NeMo Canary
91+
shell: bash
92+
run: |
93+
name=nemo-canary-cxx-api
94+
g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
95+
-I ./build/install/include \
96+
-L ./build/install/lib/ \
97+
-l sherpa-onnx-cxx-api \
98+
-l sherpa-onnx-c-api \
99+
-l onnxruntime
100+
101+
ls -lh $name
102+
103+
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
104+
ldd ./$name
105+
echo "----"
106+
readelf -d ./$name
107+
fi
108+
109+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
110+
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
111+
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
112+
113+
ls -lh sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
114+
echo "---"
115+
116+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
117+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
118+
119+
./$name
120+
121+
rm -rf sherpa-onnx-nemo-canary-*
122+
rm -v ./$name
123+
90124
- name: Test streaming zipformer with Homophone replacer
91125
shell: bash
92126
run: |

c-api-examples/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api)
5353
add_executable(fire-red-asr-c-api fire-red-asr-c-api.c)
5454
target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api)
5555

56+
add_executable(nemo-canary-c-api nemo-canary-c-api.c)
57+
target_link_libraries(nemo-canary-c-api sherpa-onnx-c-api)
58+
5659
add_executable(sense-voice-c-api sense-voice-c-api.c)
5760
target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)
5861

c-api-examples/nemo-canary-c-api.c

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
// c-api-examples/nemo-canary-c-api.c
2+
//
3+
// Copyright (c) 2025 Xiaomi Corporation
4+
5+
// We assume you have pre-downloaded the Nemo Canary model
6+
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
7+
// An example is given below:
8+
//
9+
// clang-format off
10+
//
11+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
12+
// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
13+
// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
14+
//
15+
// clang-format on
16+
//
17+
// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
18+
// for details
19+
20+
#include <stdio.h>
21+
#include <stdlib.h>
22+
#include <string.h>
23+
24+
#include "sherpa-onnx/c-api/c-api.h"
25+
26+
int32_t main() {
27+
const char *wav_filename =
28+
"./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";
29+
const char *encoder_filename =
30+
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
31+
const char *decoder_filename =
32+
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
33+
const char *tokens_filename =
34+
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
35+
const char *provider = "cpu";
36+
37+
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
38+
if (wave == NULL) {
39+
fprintf(stderr, "Failed to read %s\n", wav_filename);
40+
return -1;
41+
}
42+
43+
// Offline model config
44+
SherpaOnnxOfflineModelConfig offline_model_config;
45+
memset(&offline_model_config, 0, sizeof(offline_model_config));
46+
47+
// set debug to 1 to view more logs
48+
offline_model_config.debug = 0;
49+
50+
offline_model_config.num_threads = 1;
51+
offline_model_config.provider = provider;
52+
offline_model_config.tokens = tokens_filename;
53+
offline_model_config.canary.encoder = encoder_filename;
54+
offline_model_config.canary.decoder = decoder_filename;
55+
56+
// so it output punctuations and cases
57+
offline_model_config.canary.use_pnc = 1;
58+
59+
offline_model_config.canary.src_lang = "de";
60+
61+
// since there is a German audio, you can set tgt_lang to en or de
62+
offline_model_config.canary.tgt_lang = "en";
63+
64+
// Recognizer config
65+
SherpaOnnxOfflineRecognizerConfig recognizer_config;
66+
memset(&recognizer_config, 0, sizeof(recognizer_config));
67+
recognizer_config.decoding_method = "greedy_search";
68+
recognizer_config.model_config = offline_model_config;
69+
70+
const SherpaOnnxOfflineRecognizer *recognizer =
71+
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
72+
73+
if (recognizer == NULL) {
74+
fprintf(stderr, "Please check your config!\n");
75+
76+
SherpaOnnxFreeWave(wave);
77+
78+
return -1;
79+
}
80+
81+
const SherpaOnnxOfflineStream *stream =
82+
SherpaOnnxCreateOfflineStream(recognizer);
83+
84+
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
85+
wave->num_samples);
86+
SherpaOnnxDecodeOfflineStream(recognizer, stream);
87+
const SherpaOnnxOfflineRecognizerResult *result =
88+
SherpaOnnxGetOfflineStreamResult(stream);
89+
90+
fprintf(stderr, "Decoded text (English): %s\n", result->text);
91+
92+
SherpaOnnxDestroyOfflineRecognizerResult(result);
93+
SherpaOnnxDestroyOfflineStream(stream);
94+
95+
// now output German text
96+
recognizer_config.model_config.canary.tgt_lang = "de";
97+
SherpaOnnxOfflineRecognizerSetConfig(recognizer, &recognizer_config);
98+
99+
stream = SherpaOnnxCreateOfflineStream(recognizer);
100+
101+
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
102+
wave->num_samples);
103+
SherpaOnnxDecodeOfflineStream(recognizer, stream);
104+
result = SherpaOnnxGetOfflineStreamResult(stream);
105+
106+
fprintf(stderr, "Decoded text (German): %s\n", result->text);
107+
108+
SherpaOnnxDestroyOfflineRecognizerResult(result);
109+
SherpaOnnxDestroyOfflineStream(stream);
110+
111+
SherpaOnnxDestroyOfflineRecognizer(recognizer);
112+
SherpaOnnxFreeWave(wave);
113+
114+
return 0;
115+
}

c-api-examples/streaming-ctc-buffered-tokens-c-api.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ int32_t main() {
5454
"DEV_T0000000000.wav";
5555
const char *model_filename =
5656
"sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/"
57-
"ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx";
57+
"ctc-epoch-20-avg-1-chunk-16-left-128.onnx";
5858
const char *tokens_filename =
5959
"sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt";
6060
const char *provider = "cpu";

cxx-api-examples/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
2727
add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
2828
target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
2929

30+
add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc)
31+
target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api)
32+
3033
if(SHERPA_ONNX_ENABLE_PORTAUDIO)
3134
add_executable(sense-voice-simulate-streaming-microphone-cxx-api
3235
./sense-voice-simulate-streaming-microphone-cxx-api.cc
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// cxx-api-examples/nemo-canary-cxx-api.cc
2+
//
3+
// Copyright (c) 2025 Xiaomi Corporation
4+
5+
//
6+
// This file demonstrates how to use NeMo Canary models with
7+
// sherpa-onnx's C++ API.
8+
//
9+
// clang-format off
10+
//
11+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
12+
// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
13+
// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
14+
//
15+
// clang-format on
16+
//
17+
// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
18+
// for details
19+
20+
#include <chrono> // NOLINT
21+
#include <iostream>
22+
#include <string>
23+
24+
#include "sherpa-onnx/c-api/cxx-api.h"
25+
26+
int32_t main() {
27+
using namespace sherpa_onnx::cxx; // NOLINT
28+
OfflineRecognizerConfig config;
29+
30+
config.model_config.canary.encoder =
31+
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
32+
config.model_config.canary.decoder =
33+
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
34+
35+
// our input audio is German, so we set src_lang to "de"
36+
config.model_config.canary.src_lang = "de";
37+
38+
// we can set tgt_lang either to de or en in this specific case
39+
config.model_config.canary.tgt_lang = "en";
40+
config.model_config.tokens =
41+
"sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
42+
43+
config.model_config.num_threads = 1;
44+
45+
std::cout << "Loading model\n";
46+
OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
47+
if (!recognizer.Get()) {
48+
std::cerr << "Please check your config\n";
49+
return -1;
50+
}
51+
std::cout << "Loading model done\n";
52+
53+
std::string wave_filename =
54+
"./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";
55+
56+
Wave wave = ReadWave(wave_filename);
57+
if (wave.samples.empty()) {
58+
std::cerr << "Failed to read: '" << wave_filename << "'\n";
59+
return -1;
60+
}
61+
62+
std::cout << "Start recognition\n";
63+
const auto begin = std::chrono::steady_clock::now();
64+
65+
OfflineStream stream = recognizer.CreateStream();
66+
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
67+
wave.samples.size());
68+
69+
recognizer.Decode(&stream);
70+
71+
OfflineRecognizerResult result = recognizer.GetResult(&stream);
72+
73+
const auto end = std::chrono::steady_clock::now();
74+
const float elapsed_seconds =
75+
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
76+
.count() /
77+
1000.;
78+
float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
79+
float rtf = elapsed_seconds / duration;
80+
81+
std::cout << "text (English): " << result.text << "\n";
82+
printf("Number of threads: %d\n", config.model_config.num_threads);
83+
printf("Duration: %.3fs\n", duration);
84+
printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
85+
printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
86+
duration, rtf);
87+
88+
// now output text in German
89+
config.model_config.canary.tgt_lang = "de";
90+
recognizer.SetConfig(config);
91+
stream = recognizer.CreateStream();
92+
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
93+
wave.samples.size());
94+
95+
recognizer.Decode(&stream);
96+
97+
result = recognizer.GetResult(&stream);
98+
std::cout << "text (German): " << result.text << "\n";
99+
100+
return 0;
101+
}

0 commit comments

Comments
 (0)