Skip to content

Commit 103e93d

Browse files
authored
Add Java and Kotlin API for NeMo Canary models (#2359)
Add support for the NeMo Canary model in both Java and Kotlin APIs, wiring it through JNI and updating examples and CI. - Introduce OfflineCanaryModelConfig in Kotlin and Java with builder patterns - Extend OfflineRecognizer to accept and apply the new canary config via setConfig - Update JNI binding (GetOfflineConfig) and getOfflineModelConfig mapping (type 32), plus examples and CI workflows
1 parent df4615c commit 103e93d

File tree

12 files changed

+363
-11
lines changed

12 files changed

+363
-11
lines changed

.github/workflows/run-java-test.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,13 @@ jobs:
117117
cd ./java-api-examples
118118
./run-version-test.sh
119119
120+
- name: Run java test (Nemo Canary)
121+
shell: bash
122+
run: |
123+
cd ./java-api-examples
124+
./run-non-streaming-decode-file-nemo-canary.sh
125+
rm -rf sherpa-onnx-nemo-*
126+
120127
- name: Run java test (Non-streaming SenseVoice with homophone replacer)
121128
shell: bash
122129
run: |
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright 2024 Xiaomi Corporation
2+
3+
// This file shows how to use an offline NeMo Canary model, i.e.,
4+
// non-streaming NeMo Canary model, to decode files.
5+
import com.k2fsa.sherpa.onnx.*;
6+
7+
public class NonStreamingDecodeFileNemoCanary {
8+
public static void main(String[] args) {
9+
// please refer to
10+
// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
11+
// to download model files
12+
String encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
13+
String decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
14+
String tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
15+
16+
String waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
17+
18+
WaveReader reader = new WaveReader(waveFilename);
19+
20+
OfflineCanaryModelConfig canary =
21+
OfflineCanaryModelConfig.builder()
22+
.setEncoder(encoder)
23+
.setDecoder(decoder)
24+
.setSrcLang("en")
25+
.setTgtLang("en")
26+
.setUsePnc(true)
27+
.build();
28+
29+
OfflineModelConfig modelConfig =
30+
OfflineModelConfig.builder()
31+
.setCanary(canary)
32+
.setTokens(tokens)
33+
.setNumThreads(1)
34+
.setDebug(true)
35+
.build();
36+
37+
OfflineRecognizerConfig config =
38+
OfflineRecognizerConfig.builder()
39+
.setOfflineModelConfig(modelConfig)
40+
.setDecodingMethod("greedy_search")
41+
.build();
42+
43+
OfflineRecognizer recognizer = new OfflineRecognizer(config);
44+
OfflineStream stream = recognizer.createStream();
45+
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
46+
47+
recognizer.decode(stream);
48+
49+
String text = recognizer.getResult(stream).getText();
50+
51+
System.out.printf("filename:%s\nresult(English):%s\n", waveFilename, text);
52+
53+
stream.release();
54+
recognizer.release();
55+
}
56+
}

java-api-examples/README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,18 @@ This directory contains examples for the JAVA API of sherpa-onnx.
2424

2525
```bash
2626
./run-non-streaming-decode-file-dolphin-ctc.sh
27+
./run-non-streaming-decode-file-fire-red-asr.sh
28+
./run-non-streaming-decode-file-moonshine.sh
29+
./run-non-streaming-decode-file-nemo-canary.sh
30+
./run-non-streaming-decode-file-nemo.sh
2731
./run-non-streaming-decode-file-paraformer.sh
2832
./run-non-streaming-decode-file-sense-voice.sh
33+
./run-non-streaming-decode-file-tele-speech-ctc.sh
34+
./run-non-streaming-decode-file-transducer-hotwords.sh
2935
./run-non-streaming-decode-file-transducer.sh
36+
./run-non-streaming-decode-file-whisper-multiple.sh
3037
./run-non-streaming-decode-file-whisper.sh
31-
./run-non-streaming-decode-file-nemo.sh
38+
./run-non-streaming-decode-file-zipformer-ctc.sh
3239
```
3340

3441
## Non-Streaming Speech recognition with homophone replacer
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
6+
mkdir -p ../build
7+
pushd ../build
8+
cmake \
9+
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
10+
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
11+
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
12+
-DBUILD_SHARED_LIBS=ON \
13+
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
14+
-DSHERPA_ONNX_ENABLE_JNI=ON \
15+
..
16+
17+
make -j4
18+
ls -lh lib
19+
popd
20+
fi
21+
22+
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
23+
pushd ../sherpa-onnx/java-api
24+
make
25+
popd
26+
fi
27+
28+
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
29+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
30+
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
31+
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
32+
fi
33+
34+
java \
35+
-Djava.library.path=$PWD/../build/lib \
36+
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
37+
NonStreamingDecodeFileNemoCanary.java

kotlin-api-examples/run.sh

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,8 +455,31 @@ function testOfflineSenseVoiceWithHr() {
455455
ls -lh $out_filename
456456
java -Djava.library.path=../build/lib -jar $out_filename
457457
}
458-
testVersion
459458

459+
function testOfflineNeMoCanary() {
460+
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
461+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
462+
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
463+
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
464+
fi
465+
466+
out_filename=test_offline_nemo_canary.jar
467+
kotlinc-jvm -include-runtime -d $out_filename \
468+
test_offline_nemo_canary.kt \
469+
FeatureConfig.kt \
470+
HomophoneReplacerConfig.kt \
471+
OfflineRecognizer.kt \
472+
OfflineStream.kt \
473+
WaveReader.kt \
474+
faked-asset-manager.kt
475+
476+
ls -lh $out_filename
477+
java -Djava.library.path=../build/lib -jar $out_filename
478+
}
479+
480+
# testVersion
481+
482+
testOfflineNeMoCanary
460483
testOfflineSenseVoiceWithHr
461484
testOfflineSpeechDenoiser
462485
testOfflineSpeakerDiarization
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package com.k2fsa.sherpa.onnx
2+
3+
fun main() {
4+
val recognizer = createOfflineRecognizer()
5+
val waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
6+
7+
val objArray = WaveReader.readWaveFromFile(
8+
filename = waveFilename,
9+
)
10+
val samples: FloatArray = objArray[0] as FloatArray
11+
val sampleRate: Int = objArray[1] as Int
12+
13+
var stream = recognizer.createStream()
14+
stream.acceptWaveform(samples, sampleRate=sampleRate)
15+
recognizer.decode(stream)
16+
17+
var result = recognizer.getResult(stream)
18+
println("English: $result")
19+
20+
stream.release()
21+
22+
// now output text in German
23+
val config = recognizer.config.copy(modelConfig=recognizer.config.modelConfig.copy(
24+
canary=recognizer.config.modelConfig.canary.copy(
25+
tgtLang="de"
26+
)
27+
))
28+
recognizer.setConfig(config)
29+
30+
stream = recognizer.createStream()
31+
stream.acceptWaveform(samples, sampleRate=sampleRate)
32+
recognizer.decode(stream)
33+
34+
result = recognizer.getResult(stream)
35+
println("German: $result")
36+
37+
stream.release()
38+
recognizer.release()
39+
}
40+
41+
42+
fun createOfflineRecognizer(): OfflineRecognizer {
43+
val config = OfflineRecognizerConfig(
44+
modelConfig = getOfflineModelConfig(type = 32)!!,
45+
)
46+
47+
return OfflineRecognizer(config = config)
48+
}

sherpa-onnx/java-api/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ java_files += OfflineFireRedAsrModelConfig.java
3434
java_files += OfflineMoonshineModelConfig.java
3535
java_files += OfflineNemoEncDecCtcModelConfig.java
3636
java_files += OfflineZipformerCtcModelConfig.java
37+
java_files += OfflineCanaryModelConfig.java
3738
java_files += OfflineSenseVoiceModelConfig.java
3839
java_files += OfflineDolphinModelConfig.java
3940
java_files += OfflineModelConfig.java
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// Copyright 2025 Xiaomi Corporation
2+
3+
package com.k2fsa.sherpa.onnx;
4+
5+
public class OfflineCanaryModelConfig {
6+
private final String encoder;
7+
private final String decoder;
8+
private final String srcLang;
9+
private final String tgtLang;
10+
private final boolean usePnc;
11+
12+
private OfflineCanaryModelConfig(Builder builder) {
13+
this.encoder = builder.encoder;
14+
this.decoder = builder.decoder;
15+
this.srcLang = builder.srcLang;
16+
this.tgtLang = builder.tgtLang;
17+
this.usePnc = builder.usePnc;
18+
}
19+
20+
public static Builder builder() {
21+
return new Builder();
22+
}
23+
24+
public String getEncoder() {
25+
return encoder;
26+
}
27+
28+
public String getDecoder() {
29+
return decoder;
30+
}
31+
32+
public String getSrcLang() {
33+
return srcLang;
34+
}
35+
36+
public String getTgtLang() {
37+
return tgtLang;
38+
}
39+
40+
public boolean isUsePnc() {
41+
return usePnc;
42+
}
43+
44+
public static class Builder {
45+
private String encoder = "";
46+
private String decoder = "";
47+
private String srcLang = "en";
48+
private String tgtLang = "en";
49+
private boolean usePnc = true;
50+
51+
public OfflineCanaryModelConfig build() {
52+
return new OfflineCanaryModelConfig(this);
53+
}
54+
55+
public Builder setEncoder(String encoder) {
56+
this.encoder = encoder;
57+
return this;
58+
}
59+
60+
public Builder setDecoder(String decoder) {
61+
this.decoder = decoder;
62+
return this;
63+
}
64+
65+
public Builder setSrcLang(String srcLang) {
66+
this.srcLang = srcLang;
67+
return this;
68+
}
69+
70+
public Builder setTgtLang(String tgtLang) {
71+
this.tgtLang = tgtLang;
72+
return this;
73+
}
74+
75+
public Builder setUsePnc(boolean usePnc) {
76+
this.usePnc = usePnc;
77+
return this;
78+
}
79+
}
80+
}

sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ public class OfflineModelConfig {
1212
private final OfflineSenseVoiceModelConfig senseVoice;
1313
private final OfflineDolphinModelConfig dolphin;
1414
private final OfflineZipformerCtcModelConfig zipformerCtc;
15+
private final OfflineCanaryModelConfig canary;
1516
private final String teleSpeech;
1617
private final String tokens;
1718
private final int numThreads;
@@ -30,6 +31,7 @@ private OfflineModelConfig(Builder builder) {
3031
this.moonshine = builder.moonshine;
3132
this.nemo = builder.nemo;
3233
this.zipformerCtc = builder.zipformerCtc;
34+
this.canary = builder.canary;
3335
this.senseVoice = builder.senseVoice;
3436
this.dolphin = builder.dolphin;
3537
this.teleSpeech = builder.teleSpeech;
@@ -78,6 +80,10 @@ public OfflineZipformerCtcModelConfig getZipformerCtc() {
7880
return zipformerCtc;
7981
}
8082

83+
public OfflineCanaryModelConfig getCanary() {
84+
return canary;
85+
}
86+
8187
public String getTokens() {
8288
return tokens;
8389
}
@@ -120,6 +126,7 @@ public static class Builder {
120126
private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build();
121127
private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build();
122128
private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build();
129+
private OfflineCanaryModelConfig canary = OfflineCanaryModelConfig.builder().build();
123130
private String teleSpeech = "";
124131
private String tokens = "";
125132
private int numThreads = 1;
@@ -158,6 +165,11 @@ public Builder setZipformerCtc(OfflineZipformerCtcModelConfig zipformerCtc) {
158165
return this;
159166
}
160167

168+
public Builder setCanary(OfflineCanaryModelConfig canary) {
169+
this.canary = canary;
170+
return this;
171+
}
172+
161173
public Builder setTeleSpeech(String teleSpeech) {
162174
this.teleSpeech = teleSpeech;
163175
return this;

sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,22 @@
44

55
public class OfflineRecognizer {
66
private long ptr = 0;
7+
private final OfflineRecognizerConfig config;
78

89
public OfflineRecognizer(OfflineRecognizerConfig config) {
910
LibraryLoader.maybeLoad();
1011
ptr = newFromFile(config);
12+
13+
this.config = config;
14+
}
15+
16+
public void setConfig(OfflineRecognizerConfig config) {
17+
setConfig(ptr, config);
18+
// we don't update this.config
19+
}
20+
21+
public OfflineRecognizerConfig getConfig() {
22+
return config;
1123
}
1224

1325
public void decode(OfflineStream s) {
@@ -60,6 +72,8 @@ public OfflineRecognizerResult getResult(OfflineStream s) {
6072

6173
private native void decode(long ptr, long streamPtr);
6274

75+
private native void setConfig(long ptr, OfflineRecognizerConfig config);
76+
6377
private native void decodeStreams(long ptr, long[] streamPtrs);
6478

6579
private native Object[] getResult(long streamPtr);

0 commit comments

Comments
 (0)