Skip to content

Commit fd9a687

Browse files
authored
Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)
Add support for the new NeMo Canary ASR model across multiple language bindings by introducing a Canary model configuration and setter method on the offline recognizer. - Define Canary model config in Pascal, Go, C#, Dart and update converter functions - Add SetConfig API for offline recognizer (Pascal, Go, C#, Dart) - Extend CI/workflows and example scripts to test non-streaming Canary decoding
1 parent e2b2d5e commit fd9a687

File tree

27 files changed

+779
-8
lines changed

27 files changed

+779
-8
lines changed

.github/scripts/test-dot-net.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ cd ./version-test
66
./run.sh
77
ls -lh
88

9+
cd ../non-streaming-canary-decode-files
10+
./run.sh
11+
ls -lh
12+
rm -rf sherpa-onnx-nemo-*
13+
914
cd ../offline-decode-files
1015

1116
./run-zipformer-ctc.sh

.github/workflows/pascal.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ jobs:
156156
157157
pushd non-streaming-asr
158158
159+
./run-nemo-canary.sh
160+
rm -rf sherpa-onnx-*
161+
echo "---"
162+
159163
./run-zipformer-ctc.sh
160164
rm -rf sherpa-onnx-*
161165
echo "---"

.github/workflows/test-go-package.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,14 @@ jobs:
7676
run: |
7777
gcc --version
7878
79+
- name: Test NeMo Canary ASR
80+
if: matrix.os != 'windows-latest'
81+
shell: bash
82+
run: |
83+
cd go-api-examples/non-streaming-canary-decode-files
84+
./run.sh
85+
rm -rf sherpa-onnx-nemo-*
86+
7987
- name: Test speech enhancement (GTCRN)
8088
if: matrix.os != 'windows-latest'
8189
shell: bash

.github/workflows/test-go.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ jobs:
108108
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation
109109
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging
110110
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/
111+
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-canary-decode-files/
111112
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/
112113
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/
113114
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/
@@ -148,6 +149,19 @@ jobs:
148149
name: ${{ matrix.os }}-libs
149150
path: to-upload/
150151

152+
- name: Test non-streaming decoding files with NeMo Canary
153+
shell: bash
154+
run: |
155+
cd scripts/go/_internal/non-streaming-canary-decode-files/
156+
ls -lh
157+
go mod tidy
158+
cat go.mod
159+
go build
160+
ls -lh
161+
162+
./run.sh
163+
rm -rf sherpa-onnx-nemo-*
164+
151165
- name: Test streaming decoding files
152166
shell: bash
153167
run: |
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Copyright (c) 2025 Xiaomi Corporation
2+
import 'dart:io';
3+
4+
import 'package:args/args.dart';
5+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
6+
7+
import './init.dart';
8+
9+
void main(List<String> arguments) async {
10+
await initSherpaOnnx();
11+
12+
final parser = ArgParser()
13+
..addOption('encoder', help: 'Path to the NeMo Canary encoder model')
14+
..addOption('decoder', help: 'Path to the NeMo Canary decoder model')
15+
..addOption('src-lang', help: 'Language of the input audio')
16+
..addOption('tgt-lang', help: 'Language of the recognition result')
17+
..addOption('tokens', help: 'Path to tokens.txt')
18+
..addOption('input-wav', help: 'Path to input.wav to transcribe');
19+
20+
final res = parser.parse(arguments);
21+
if (res['encoder'] == null ||
22+
res['decoder'] == null ||
23+
res['src-lang'] == null ||
24+
res['tgt-lang'] == null ||
25+
res['tokens'] == null ||
26+
res['input-wav'] == null) {
27+
print(parser.usage);
28+
exit(1);
29+
}
30+
31+
final encoder = res['encoder'] as String;
32+
final decoder = res['decoder'] as String;
33+
final srcLang = res['src-lang'] as String;
34+
final tgtLang = res['tgt-lang'] as String;
35+
final tokens = res['tokens'] as String;
36+
final inputWav = res['input-wav'] as String;
37+
38+
final canary = sherpa_onnx.OfflineCanaryModelConfig(
39+
encoder: encoder, decoder: decoder, srcLang: srcLang, tgtLang: tgtLang);
40+
41+
final modelConfig = sherpa_onnx.OfflineModelConfig(
42+
canary: canary,
43+
tokens: tokens,
44+
debug: false,
45+
numThreads: 1,
46+
);
47+
var config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
48+
final recognizer = sherpa_onnx.OfflineRecognizer(config);
49+
50+
final waveData = sherpa_onnx.readWave(inputWav);
51+
final stream = recognizer.createStream();
52+
53+
stream.acceptWaveform(
54+
samples: waveData.samples, sampleRate: waveData.sampleRate);
55+
recognizer.decode(stream);
56+
57+
final result = recognizer.getResult(stream);
58+
print('Result in $tgtLang: ${result.text}');
59+
60+
stream.free();
61+
62+
// Example to change the target language to de
63+
if (tgtLang != 'en') {
64+
var json = config.toJson();
65+
66+
((json['model'] as Map<String, dynamic>)!['canary']
67+
as Map<String, dynamic>)!['tgtLang'] = 'en';
68+
69+
config = sherpa_onnx.OfflineRecognizerConfig.fromJson(json);
70+
recognizer.setConfig(config);
71+
72+
final stream = recognizer.createStream();
73+
74+
stream.acceptWaveform(
75+
samples: waveData.samples, sampleRate: waveData.sampleRate);
76+
recognizer.decode(stream);
77+
78+
final result = recognizer.getResult(stream);
79+
print('Result in English: ${result.text}');
80+
stream.free();
81+
}
82+
83+
recognizer.free();
84+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
dart pub get
6+
7+
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
8+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
9+
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
10+
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
11+
fi
12+
13+
for tgt_lang in en de es fr; do
14+
dart run \
15+
./bin/nemo-canary.dart \
16+
--encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
17+
--decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
18+
--tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
19+
--src-lang en \
20+
--tgt-lang $tgt_lang \
21+
--input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav
22+
done
23+
24+
for tgt_lang in en de; do
25+
dart run \
26+
./bin/nemo-canary.dart \
27+
--encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
28+
--decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
29+
--tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
30+
--src-lang de \
31+
--tgt-lang $tgt_lang \
32+
--input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav
33+
done
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright (c) 2025 Xiaomi Corporation
2+
//
3+
// This file shows how to use a NeMo Canary model for speech recognition.
4+
//
5+
// You can find the model doc at
6+
// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
7+
using SherpaOnnx;
8+
9+
class NonStreamingAsrCanary
10+
{
11+
static void Main(string[] args)
12+
{
13+
// please download model files from
14+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
15+
var config = new OfflineRecognizerConfig();
16+
config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
17+
config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
18+
config.ModelConfig.Canary.SrcLang = "en";
19+
config.ModelConfig.Canary.TgtLang = "en";
20+
config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
21+
config.ModelConfig.Debug = 0;
22+
var recognizer = new OfflineRecognizer(config);
23+
24+
var testWaveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
25+
var reader = new WaveReader(testWaveFilename);
26+
var stream = recognizer.CreateStream();
27+
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
28+
recognizer.Decode(stream);
29+
var text = stream.Result.Text;
30+
Console.WriteLine("Text (English): {0}", text);
31+
32+
// Now output text in German
33+
config.ModelConfig.Canary.TgtLang = "de";
34+
recognizer.SetConfig(config);
35+
36+
stream = recognizer.CreateStream();
37+
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
38+
recognizer.Decode(stream);
39+
text = stream.Result.Text;
40+
Console.WriteLine("Text (German): {0}", text);
41+
}
42+
}
43+
44+
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net8.0</TargetFramework>
6+
<RootNamespace>non_streaming_canary_decode_files</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<ProjectReference Include="..\Common\Common.csproj" />
13+
</ItemGroup>
14+
15+
</Project>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
6+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
7+
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
8+
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
9+
fi
10+
11+
dotnet run

dotnet-examples/sherpa-onnx.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn",
3939
EndProject
4040
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}"
4141
EndProject
42+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-canary-decode-files", "non-streaming-canary-decode-files\non-streaming-canary-decode-files.csproj", "{925779DB-4429-4366-87C3-B14DD44AE1D4}"
43+
EndProject
4244
Global
4345
GlobalSection(SolutionConfigurationPlatforms) = preSolution
4446
Debug|Any CPU = Debug|Any CPU
@@ -117,6 +119,10 @@ Global
117119
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU
118120
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU
119121
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU
122+
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
123+
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.Build.0 = Debug|Any CPU
124+
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.ActiveCfg = Release|Any CPU
125+
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.Build.0 = Release|Any CPU
120126
EndGlobalSection
121127
GlobalSection(SolutionProperties) = preSolution
122128
HideSolutionNode = FALSE

0 commit comments

Comments
 (0)