Skip to content

Commit ceb1bc5

Browse files
authored
Add C API for ten-vad (#2379)
1 parent da9f303 commit ceb1bc5

File tree

9 files changed

+302
-34
lines changed

9 files changed

+302
-34
lines changed

.github/workflows/c-api.yaml

Lines changed: 96 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ jobs:
376376
name: matcha-tts-${{ matrix.os }}
377377
path: ./generated-matcha-*.wav
378378

379-
- name: Test vad + Whisper tiny.en
379+
- name: Test silero-vad + Whisper tiny.en
380380
shell: bash
381381
run: |
382382
gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \
@@ -403,7 +403,34 @@ jobs:
403403
rm -rf *.onnx
404404
rm *.wav
405405
406-
- name: Test vad + Moonshine
406+
- name: Test ten-vad + Whisper tiny.en
407+
shell: bash
408+
run: |
409+
gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \
410+
-I ./build/install/include \
411+
-L ./build/install/lib/ \
412+
-l sherpa-onnx-c-api \
413+
-l onnxruntime
414+
415+
# Now download models
416+
#
417+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
418+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
419+
420+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
421+
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
422+
rm sherpa-onnx-whisper-tiny.en.tar.bz2
423+
424+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
425+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
426+
427+
./vad-whisper-c-api
428+
429+
rm -rf sherpa-onnx-*
430+
rm -rf *.onnx
431+
rm *.wav
432+
433+
- name: Test silero-vad + Moonshine
407434
shell: bash
408435
run: |
409436
gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \
@@ -430,6 +457,33 @@ jobs:
430457
rm -rf *.onnx
431458
rm *.wav
432459
460+
- name: Test ten-vad + Moonshine
461+
shell: bash
462+
run: |
463+
gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \
464+
-I ./build/install/include \
465+
-L ./build/install/lib/ \
466+
-l sherpa-onnx-c-api \
467+
-l onnxruntime
468+
469+
# Now download models
470+
#
471+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
472+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
473+
474+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
475+
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
476+
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
477+
478+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
479+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
480+
481+
./vad-moonshine-c-api
482+
483+
rm -rf sherpa-onnx-*
484+
rm -rf *.onnx
485+
rm *.wav
486+
433487
- name: Test Moonshine
434488
shell: bash
435489
run: |
@@ -466,7 +520,7 @@ jobs:
466520
./run.sh
467521
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
468522
469-
- name: Test vad + sense-voice
523+
- name: Test silero-vad + sense-voice
470524
shell: bash
471525
run: |
472526
gcc -o vad-sense-voice-c-api ./c-api-examples/vad-sense-voice-c-api.c \
@@ -505,6 +559,45 @@ jobs:
505559
rm -rf *.onnx
506560
rm *.wav
507561
562+
- name: Test ten-vad + sense-voice
563+
shell: bash
564+
run: |
565+
gcc -o vad-sense-voice-c-api ./c-api-examples/vad-sense-voice-c-api.c \
566+
-I ./build/install/include \
567+
-L ./build/install/lib/ \
568+
-l sherpa-onnx-c-api \
569+
-l onnxruntime
570+
571+
ls -lh vad-sense-voice-c-api
572+
573+
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
574+
ldd ./vad-sense-voice-c-api
575+
echo "----"
576+
readelf -d ./vad-sense-voice-c-api
577+
fi
578+
579+
# Now download models
580+
#
581+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
582+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
583+
584+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
585+
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
586+
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
587+
588+
ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
589+
echo "---"
590+
ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs
591+
592+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
593+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
594+
595+
./vad-sense-voice-c-api
596+
597+
rm -rf sherpa-onnx-sense-voice-*
598+
rm -rf *.onnx
599+
rm *.wav
600+
508601
- name: Test sense-voice
509602
shell: bash
510603
run: |

c-api-examples/vad-moonshine-c-api.c

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66
// This file demonstrates how to use VAD + Moonshine with sherpa-onnx's C API.
77
// clang-format off
88
//
9-
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
9+
// To use silero-vad:
10+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
11+
//
12+
// To use ten-vad:
13+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
14+
//
1015
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
1116
//
1217
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
@@ -23,7 +28,27 @@
2328

2429
int32_t main() {
2530
const char *wav_filename = "./Obama.wav";
26-
const char *vad_filename = "./silero_vad.onnx";
31+
if (!SherpaOnnxFileExists(wav_filename)) {
32+
fprintf(stderr, "Please download %s\n", wav_filename);
33+
return -1;
34+
}
35+
36+
const char *vad_filename;
37+
int32_t use_silero_vad = 0;
38+
int32_t use_ten_vad = 0;
39+
40+
if (SherpaOnnxFileExists("./silero_vad.onnx")) {
41+
printf("Use silero-vad\n");
42+
vad_filename = "./silero_vad.onnx";
43+
use_silero_vad = 1;
44+
} else if (SherpaOnnxFileExists("./ten-vad.onnx")) {
45+
printf("Use ten-vad\n");
46+
vad_filename = "./ten-vad.onnx";
47+
use_ten_vad = 1;
48+
} else {
49+
fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
50+
return -1;
51+
}
2752

2853
const char *preprocessor =
2954
"./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
@@ -76,12 +101,22 @@ int32_t main() {
76101

77102
SherpaOnnxVadModelConfig vadConfig;
78103
memset(&vadConfig, 0, sizeof(vadConfig));
79-
vadConfig.silero_vad.model = vad_filename;
80-
vadConfig.silero_vad.threshold = 0.5;
81-
vadConfig.silero_vad.min_silence_duration = 0.5;
82-
vadConfig.silero_vad.min_speech_duration = 0.5;
83-
vadConfig.silero_vad.max_speech_duration = 10;
84-
vadConfig.silero_vad.window_size = 512;
104+
if (use_silero_vad) {
105+
vadConfig.silero_vad.model = vad_filename;
106+
vadConfig.silero_vad.threshold = 0.25;
107+
vadConfig.silero_vad.min_silence_duration = 0.5;
108+
vadConfig.silero_vad.min_speech_duration = 0.5;
109+
vadConfig.silero_vad.max_speech_duration = 10;
110+
vadConfig.silero_vad.window_size = 512;
111+
} else if (use_ten_vad) {
112+
vadConfig.ten_vad.model = vad_filename;
113+
vadConfig.ten_vad.threshold = 0.25;
114+
vadConfig.ten_vad.min_silence_duration = 0.5;
115+
vadConfig.ten_vad.min_speech_duration = 0.5;
116+
vadConfig.ten_vad.max_speech_duration = 10;
117+
vadConfig.ten_vad.window_size = 256;
118+
}
119+
85120
vadConfig.sample_rate = 16000;
86121
vadConfig.num_threads = 1;
87122
vadConfig.debug = 1;
@@ -96,7 +131,9 @@ int32_t main() {
96131
return -1;
97132
}
98133

99-
int32_t window_size = vadConfig.silero_vad.window_size;
134+
int32_t window_size = use_silero_vad ? vadConfig.silero_vad.window_size
135+
: vadConfig.ten_vad.window_size;
136+
100137
int32_t i = 0;
101138
int is_eof = 0;
102139

c-api-examples/vad-sense-voice-c-api.c

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66
// This file demonstrates how to use VAD + SenseVoice with sherpa-onnx's C API.
77
// clang-format off
88
//
9-
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
9+
// To use silero-vad:
10+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
11+
//
12+
// To use ten-vad:
13+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
14+
//
1015
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
1116
//
1217
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
@@ -23,7 +28,28 @@
2328

2429
int32_t main() {
2530
const char *wav_filename = "./lei-jun-test.wav";
26-
const char *vad_filename = "./silero_vad.onnx";
31+
if (!SherpaOnnxFileExists(wav_filename)) {
32+
fprintf(stderr, "Please download %s\n", wav_filename);
33+
return -1;
34+
}
35+
36+
const char *vad_filename;
37+
int32_t use_silero_vad = 0;
38+
int32_t use_ten_vad = 0;
39+
40+
if (SherpaOnnxFileExists("./silero_vad.onnx")) {
41+
printf("Use silero-vad\n");
42+
vad_filename = "./silero_vad.onnx";
43+
use_silero_vad = 1;
44+
} else if (SherpaOnnxFileExists("./ten-vad.onnx")) {
45+
printf("Use ten-vad\n");
46+
vad_filename = "./ten-vad.onnx";
47+
use_ten_vad = 1;
48+
} else {
49+
fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
50+
return -1;
51+
}
52+
2753
const char *model_filename =
2854
"./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
2955
const char *tokens_filename =
@@ -77,12 +103,23 @@ int32_t main() {
77103

78104
SherpaOnnxVadModelConfig vadConfig;
79105
memset(&vadConfig, 0, sizeof(vadConfig));
80-
vadConfig.silero_vad.model = vad_filename;
81-
vadConfig.silero_vad.threshold = 0.5;
82-
vadConfig.silero_vad.min_silence_duration = 0.5;
83-
vadConfig.silero_vad.min_speech_duration = 0.5;
84-
vadConfig.silero_vad.max_speech_duration = 5;
85-
vadConfig.silero_vad.window_size = 512;
106+
107+
if (use_silero_vad) {
108+
vadConfig.silero_vad.model = vad_filename;
109+
vadConfig.silero_vad.threshold = 0.25;
110+
vadConfig.silero_vad.min_silence_duration = 0.5;
111+
vadConfig.silero_vad.min_speech_duration = 0.5;
112+
vadConfig.silero_vad.max_speech_duration = 10;
113+
vadConfig.silero_vad.window_size = 512;
114+
} else if (use_ten_vad) {
115+
vadConfig.ten_vad.model = vad_filename;
116+
vadConfig.ten_vad.threshold = 0.25;
117+
vadConfig.ten_vad.min_silence_duration = 0.5;
118+
vadConfig.ten_vad.min_speech_duration = 0.5;
119+
vadConfig.ten_vad.max_speech_duration = 10;
120+
vadConfig.ten_vad.window_size = 256;
121+
}
122+
86123
vadConfig.sample_rate = 16000;
87124
vadConfig.num_threads = 1;
88125
vadConfig.debug = 1;
@@ -97,7 +134,8 @@ int32_t main() {
97134
return -1;
98135
}
99136

100-
int32_t window_size = vadConfig.silero_vad.window_size;
137+
int32_t window_size = use_silero_vad ? vadConfig.silero_vad.window_size
138+
: vadConfig.ten_vad.window_size;
101139
int32_t i = 0;
102140
int is_eof = 0;
103141

c-api-examples/vad-whisper-c-api.c

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@
88
//
99
// clang-format off
1010
//
11-
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
11+
// To use silero-vad:
12+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
13+
//
14+
// To use ten-vad:
15+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
16+
//
1217
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
1318
//
1419
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
@@ -25,7 +30,28 @@
2530

2631
int32_t main() {
2732
const char *wav_filename = "./Obama.wav";
28-
const char *vad_filename = "./silero_vad.onnx";
33+
34+
if (!SherpaOnnxFileExists(wav_filename)) {
35+
fprintf(stderr, "Please download %s\n", wav_filename);
36+
return -1;
37+
}
38+
39+
const char *vad_filename;
40+
int32_t use_silero_vad = 0;
41+
int32_t use_ten_vad = 0;
42+
43+
if (SherpaOnnxFileExists("./silero_vad.onnx")) {
44+
printf("Use silero-vad\n");
45+
vad_filename = "./silero_vad.onnx";
46+
use_silero_vad = 1;
47+
} else if (SherpaOnnxFileExists("./ten-vad.onnx")) {
48+
printf("Use ten-vad\n");
49+
vad_filename = "./ten-vad.onnx";
50+
use_ten_vad = 1;
51+
} else {
52+
fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
53+
return -1;
54+
}
2955

3056
const char *encoder = "sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
3157
const char *decoder = "sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
@@ -74,12 +100,23 @@ int32_t main() {
74100

75101
SherpaOnnxVadModelConfig vadConfig;
76102
memset(&vadConfig, 0, sizeof(vadConfig));
77-
vadConfig.silero_vad.model = vad_filename;
78-
vadConfig.silero_vad.threshold = 0.5;
79-
vadConfig.silero_vad.min_silence_duration = 0.5;
80-
vadConfig.silero_vad.min_speech_duration = 0.5;
81-
vadConfig.silero_vad.max_speech_duration = 10;
82-
vadConfig.silero_vad.window_size = 512;
103+
104+
if (use_silero_vad) {
105+
vadConfig.silero_vad.model = vad_filename;
106+
vadConfig.silero_vad.threshold = 0.25;
107+
vadConfig.silero_vad.min_silence_duration = 0.5;
108+
vadConfig.silero_vad.min_speech_duration = 0.5;
109+
vadConfig.silero_vad.max_speech_duration = 10;
110+
vadConfig.silero_vad.window_size = 512;
111+
} else if (use_ten_vad) {
112+
vadConfig.ten_vad.model = vad_filename;
113+
vadConfig.ten_vad.threshold = 0.25;
114+
vadConfig.ten_vad.min_silence_duration = 0.5;
115+
vadConfig.ten_vad.min_speech_duration = 0.5;
116+
vadConfig.ten_vad.max_speech_duration = 10;
117+
vadConfig.ten_vad.window_size = 256;
118+
}
119+
83120
vadConfig.sample_rate = 16000;
84121
vadConfig.num_threads = 1;
85122
vadConfig.debug = 1;
@@ -94,7 +131,8 @@ int32_t main() {
94131
return -1;
95132
}
96133

97-
int32_t window_size = vadConfig.silero_vad.window_size;
134+
int32_t window_size = use_silero_vad ? vadConfig.silero_vad.window_size
135+
: vadConfig.ten_vad.window_size;
98136
int32_t i = 0;
99137
int is_eof = 0;
100138

0 commit comments

Comments
 (0)