Skip to content

Commit 6122a67

Browse files
authored
Refactor exporting NeMo models (#2362)
Refactors and extends model export support to include new NeMo Parakeet TDT int8 variants for English and Japanese, updating the Kotlin API, export scripts, test runners, and CI workflows. - Added support for two new int8 model types in OfflineRecognizer.kt. - Enhanced Python export scripts to perform dynamic quantization and metadata injection. - Updated shell scripts and GitHub workflows to package, test, and publish int8 model artifacts.
1 parent f140577 commit 6122a67

19 files changed

+671
-23
lines changed

.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ jobs:
6161
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
6262
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
6363
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
64+
sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
65+
sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
66+
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
67+
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
68+
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
6469
)
6570
6671
for m in ${models[@]}; do
@@ -89,6 +94,11 @@ jobs:
8994
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
9095
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
9196
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
97+
sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
98+
sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
99+
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
100+
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
101+
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
92102
)
93103
for d in ${dirs[@]}; do
94104
tar cjvf ${d}.tar.bz2 ./$d

.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc.yaml

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,18 @@ jobs:
5454
curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
5555
popd
5656
57-
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
58-
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
59-
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
60-
61-
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
62-
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
63-
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
57+
names=(
58+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
59+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
60+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
61+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
62+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
63+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
64+
)
65+
for d in ${names[@]}; do
66+
cp -av test_wavs $d/
67+
tar cjvf $d.tar.bz2 $d
68+
done
6469
6570
- name: Release
6671
uses: svenstaro/upload-release-action@v2
@@ -71,3 +76,41 @@ jobs:
7176
repo_name: k2-fsa/sherpa-onnx
7277
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
7378
tag: asr-models
79+
80+
- name: Publish to huggingface
81+
env:
82+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
83+
uses: nick-fields/retry@v3
84+
with:
85+
max_attempts: 20
86+
timeout_seconds: 200
87+
shell: bash
88+
command: |
89+
git config --global user.email "csukuangfj@gmail.com"
90+
git config --global user.name "Fangjun Kuang"
91+
92+
models=(
93+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
94+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
95+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
96+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
97+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
98+
sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
99+
)
100+
101+
for m in ${models[@]}; do
102+
rm -rf huggingface
103+
export GIT_LFS_SKIP_SMUDGE=1
104+
export GIT_CLONE_PROTECTION_ACTIVE=false
105+
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
106+
cp -av $m/* huggingface
107+
cd huggingface
108+
git lfs track "*.onnx"
109+
git lfs track "*.wav"
110+
git status
111+
git add .
112+
git status
113+
git commit -m "first commit"
114+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
115+
cd ..
116+
done

.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ jobs:
6161
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
6262
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
6363
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
64+
sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
65+
sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
66+
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
67+
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
68+
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
6469
)
6570
6671
for m in ${models[@]}; do
@@ -88,6 +93,11 @@ jobs:
8893
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
8994
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
9095
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
96+
sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
97+
sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
98+
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
99+
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
100+
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
91101
)
92102
for d in ${dirs[@]}; do
93103
tar cjvf ${d}.tar.bz2 ./$d

.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer.yaml

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,18 @@ jobs:
5454
curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
5555
popd
5656
57-
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
58-
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
59-
cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
60-
61-
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
62-
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
63-
tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
57+
models=(
58+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
59+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
60+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
61+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
62+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
63+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
64+
)
65+
for m in ${models[@]}; do
66+
cp -av test_wavs $m
67+
tar cjvf $m.tar.bz2 $m
68+
done
6469
6570
- name: Release
6671
uses: svenstaro/upload-release-action@v2
@@ -71,3 +76,41 @@ jobs:
7176
repo_name: k2-fsa/sherpa-onnx
7277
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
7378
tag: asr-models
79+
80+
- name: Publish to huggingface
81+
env:
82+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
83+
uses: nick-fields/retry@v3
84+
with:
85+
max_attempts: 20
86+
timeout_seconds: 200
87+
shell: bash
88+
command: |
89+
git config --global user.email "csukuangfj@gmail.com"
90+
git config --global user.name "Fangjun Kuang"
91+
92+
models=(
93+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
94+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
95+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
96+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
97+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
98+
sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
99+
)
100+
101+
for m in ${models[@]}; do
102+
rm -rf huggingface
103+
export GIT_LFS_SKIP_SMUDGE=1
104+
export GIT_CLONE_PROTECTION_ACTIVE=false
105+
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
106+
cp -av $m/* huggingface
107+
cd huggingface
108+
git lfs track "*.onnx"
109+
git lfs track "*.wav"
110+
git status
111+
git add .
112+
git status
113+
git commit -m "first commit"
114+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
115+
cd ..
116+
done
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
name: export-nemo-parakeet-tdt
2+
3+
on:
4+
push:
5+
branches:
6+
- refactor-export-nemo
7+
workflow_dispatch:
8+
9+
concurrency:
10+
group: export-nemo-parakeet-tdt-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
export-nemo-parakeet-tdt-0_6b-v2:
15+
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
16+
name: parakeet tdt
17+
runs-on: ${{ matrix.os }}
18+
strategy:
19+
fail-fast: false
20+
matrix:
21+
os: [macos-latest]
22+
python-version: ["3.10"]
23+
24+
steps:
25+
- uses: actions/checkout@v4
26+
27+
- name: Setup Python ${{ matrix.python-version }}
28+
uses: actions/setup-python@v5
29+
with:
30+
python-version: ${{ matrix.python-version }}
31+
32+
- name: Install python dependencies
33+
shell: bash
34+
run: |
35+
pip install \
36+
nemo_toolkit['asr'] \
37+
"numpy<2" \
38+
ipython \
39+
kaldi-native-fbank \
40+
librosa \
41+
onnx==1.17.0 \
42+
onnxmltools==1.13.0 \
43+
onnxruntime==1.17.1 \
44+
soundfile
45+
46+
- name: Run
47+
shell: bash
48+
run: |
49+
cd scripts/nemo/parakeet-tdt_ctc-0.6b-ja
50+
./run-ctc.sh
51+
52+
- name: Collect files
53+
shell: bash
54+
run: |
55+
models=(
56+
sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
57+
)
58+
for m in ${models[@]}; do
59+
mv -v scripts/nemo/parakeet-tdt_ctc-0.6b-ja/$m .
60+
tar cjfv $m.tar.bz2 $m
61+
done
62+
63+
64+
- name: Publish to huggingface
65+
env:
66+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
67+
uses: nick-fields/retry@v3
68+
with:
69+
max_attempts: 20
70+
timeout_seconds: 200
71+
shell: bash
72+
command: |
73+
git config --global user.email "csukuangfj@gmail.com"
74+
git config --global user.name "Fangjun Kuang"
75+
76+
models=(
77+
sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
78+
)
79+
80+
for m in ${models[@]}; do
81+
rm -rf huggingface
82+
export GIT_LFS_SKIP_SMUDGE=1
83+
export GIT_CLONE_PROTECTION_ACTIVE=false
84+
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
85+
cp -av $m/* huggingface
86+
cd huggingface
87+
git lfs track "*.onnx"
88+
git lfs track "*.wav"
89+
git status
90+
git add .
91+
git status
92+
git commit -m "first commit"
93+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
94+
cd ..
95+
done
96+
97+
- name: Release
98+
uses: svenstaro/upload-release-action@v2
99+
with:
100+
file_glob: true
101+
file: ./*.tar.bz2
102+
overwrite: true
103+
repo_name: k2-fsa/sherpa-onnx
104+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
105+
tag: asr-models

scripts/apk/generate-vad-asr-apk-script.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,38 @@ def get_models():
565565
566566
ls -lh
567567
568+
popd
569+
""",
570+
),
571+
Model(
572+
model_name="sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8",
573+
idx=33,
574+
lang="en",
575+
lang2="English",
576+
short_name="parakeet_tdt_ctc_110m",
577+
cmd="""
578+
pushd $model_name
579+
580+
rm -rfv test_wavs
581+
582+
ls -lh
583+
584+
popd
585+
""",
586+
),
587+
Model(
588+
model_name="sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8",
589+
idx=34,
590+
lang="ja",
591+
lang2="Japanese",
592+
short_name="parakeet-tdt_ctc_0.6b_ja",
593+
cmd="""
594+
pushd $model_name
595+
596+
rm -rfv test_wavs
597+
598+
ls -lh
599+
568600
popd
569601
""",
570602
),

scripts/nemo/fast-conformer-hybrid-transducer-ctc/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@ This folder contains scripts for exporting models from
2323
- https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc
2424

2525
- https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/parakeet-tdt_ctc-110m
26+
- https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja
2627

2728
to `sherpa-onnx`.

scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc-non-streaming.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import nemo.collections.asr as nemo_asr
77
import onnx
88
import torch
9+
from onnxruntime.quantization import QuantType, quantize_dynamic
910

1011

1112
def get_args():
@@ -86,6 +87,12 @@ def main():
8687
}
8788
add_meta_data(filename, meta_data)
8889

90+
quantize_dynamic(
91+
model_input="./model.onnx",
92+
model_output="./model.int8.onnx",
93+
weight_type=QuantType.QUInt8,
94+
)
95+
8996
print("preprocessor", asr_model.cfg.preprocessor)
9097
print(meta_data)
9198

scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import nemo.collections.asr as nemo_asr
77
import onnx
88
import torch
9+
from onnxruntime.quantization import QuantType, quantize_dynamic
910

1011

1112
def get_args():
@@ -114,6 +115,11 @@ def main():
114115
"comment": "Only the CTC branch is exported",
115116
}
116117
add_meta_data(filename, meta_data)
118+
quantize_dynamic(
119+
model_input="./model.onnx",
120+
model_output="./model.int8.onnx",
121+
weight_type=QuantType.QUInt8,
122+
)
117123

118124
print(meta_data)
119125

scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer-non-streaming.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import nemo.collections.asr as nemo_asr
77
import onnx
88
import torch
9+
from onnxruntime.quantization import QuantType, quantize_dynamic
910

1011

1112
def get_args():
@@ -90,6 +91,13 @@ def main():
9091
}
9192
add_meta_data("encoder.onnx", meta_data)
9293

94+
for m in ["encoder", "decoder", "joiner"]:
95+
quantize_dynamic(
96+
model_input=f"{m}.onnx",
97+
model_output=f"{m}.int8.onnx",
98+
weight_type=QuantType.QUInt8,
99+
)
100+
93101
print(meta_data)
94102

95103

0 commit comments

Comments
 (0)