Skip to content

Commit 2778498

Browse files
authored
Add JavaScript (node-addon) API for ten-vad (#2383)
This PR adds support for the new ten-vad model in both the Node.js addon examples and the HarmonyOS wrapper. - Introduce TenVadConfig alongside existing SileroVadConfig and extend the VadConfig API. - Update C++ addon to parse ten-vad parameters and pass them through to the detector. - Modify Node.js example scripts to let users switch between silero and ten-vad and to normalize generated filenames.
1 parent fb1c35a commit 2778498

13 files changed

+130
-34
lines changed

harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";
22

3-
export { CircularBuffer, SileroVadConfig, SpeechSegment, Vad, VadConfig, } from './src/main/ets/components/Vad';
3+
export { CircularBuffer, SileroVadConfig, TenVadConfig, SpeechSegment, Vad, VadConfig, } from './src/main/ets/components/Vad';
44

55

66
export { Samples,

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,25 @@ static SherpaOnnxSileroVadModelConfig GetSileroVadConfig(
294294
return c;
295295
}
296296

297+
static SherpaOnnxTenVadModelConfig GetTenVadConfig(const Napi::Object &obj) {
298+
SherpaOnnxTenVadModelConfig c;
299+
memset(&c, 0, sizeof(c));
300+
301+
if (!obj.Has("tenVad") || !obj.Get("tenVad").IsObject()) {
302+
return c;
303+
}
304+
305+
Napi::Object o = obj.Get("tenVad").As<Napi::Object>();
306+
SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
307+
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(threshold, threshold);
308+
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_silence_duration, minSilenceDuration);
309+
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_speech_duration, minSpeechDuration);
310+
SHERPA_ONNX_ASSIGN_ATTR_INT32(window_size, windowSize);
311+
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(max_speech_duration, maxSpeechDuration);
312+
313+
return c;
314+
}
315+
297316
static Napi::External<SherpaOnnxVoiceActivityDetector>
298317
CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
299318
Napi::Env env = info.Env();
@@ -339,6 +358,7 @@ CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
339358
SherpaOnnxVadModelConfig c;
340359
memset(&c, 0, sizeof(c));
341360
c.silero_vad = GetSileroVadConfig(o);
361+
c.ten_vad = GetTenVadConfig(o);
342362

343363
SHERPA_ONNX_ASSIGN_ATTR_INT32(sample_rate, sampleRate);
344364
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
@@ -369,6 +389,7 @@ CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
369389
SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
370390
#endif
371391
SHERPA_ONNX_DELETE_C_STR(c.silero_vad.model);
392+
SHERPA_ONNX_DELETE_C_STR(c.ten_vad.model);
372393
SHERPA_ONNX_DELETE_C_STR(c.provider);
373394

374395
return Napi::External<SherpaOnnxVoiceActivityDetector>::New(

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,48 @@ export class SileroVadConfig {
2323
public minSpeechDuration: number;
2424
public minSilenceDuration: number;
2525
public windowSize: number;
26+
public maxSpeechDuration: number;
2627

2728
public constructor(model: string, threshold: number, minSpeechDuration: number, minSilenceDuration: number,
28-
windowSize: number) {
29+
windowSize: number, maxSpeechDuration: number = 20) {
2930
this.model = model;
3031
this.threshold = threshold;
3132
this.minSpeechDuration = minSpeechDuration;
3233
this.minSilenceDuration = minSilenceDuration;
3334
this.windowSize = windowSize;
35+
this.maxSpeechDuration = maxSpeechDuration
36+
}
37+
}
38+
39+
export class TenVadConfig {
40+
public model: string;
41+
public threshold: number;
42+
public minSpeechDuration: number;
43+
public minSilenceDuration: number;
44+
public windowSize: number;
45+
public maxSpeechDuration: number;
46+
47+
public constructor(model: string, threshold: number, minSpeechDuration: number, minSilenceDuration: number,
48+
windowSize: number, maxSpeechDuration: number = 20) {
49+
this.model = model;
50+
this.threshold = threshold;
51+
this.minSpeechDuration = minSpeechDuration;
52+
this.minSilenceDuration = minSilenceDuration;
53+
this.windowSize = windowSize;
54+
this.maxSpeechDuration = maxSpeechDuration
3455
}
3556
}
3657

3758
export class VadConfig {
3859
public sileroVad: SileroVadConfig;
60+
public tenVad: TenVadConfig;
3961
public sampleRate: number;
4062
public debug: boolean;
4163
public numThreads: number;
4264

43-
public constructor(sileroVad: SileroVadConfig, sampleRate: number, debug: boolean, numThreads: number) {
65+
public constructor(sileroVad: SileroVadConfig, tenVad: TenVadConfig, sampleRate: number, debug: boolean, numThreads: number) {
4466
this.sileroVad = sileroVad;
67+
this.tenVad = tenVad;
4568
this.sampleRate = sampleRate;
4669
this.debug = debug;
4770
this.numThreads = numThreads;
@@ -127,4 +150,4 @@ export class Vad {
127150
flush(): void {
128151
voiceActivityDetectorFlush(this.handle);
129152
}
130-
}
153+
}

harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
OnlineRecognizerResult,
77
readWaveFromBinary,
88
SileroVadConfig,
9+
TenVadConfig,
910
SpeechSegment,
1011
Vad,
1112
VadConfig,
@@ -31,6 +32,13 @@ function initVad(context: Context): Vad {
3132
0.5,
3233
512,
3334
),
35+
new TenVadConfig(
36+
'', // set it to ten-vad.onnx to use ten-vad
37+
0.5,
38+
0.25,
39+
0.5,
40+
256,
41+
),
3442
16000,
3543
true,
3644
1,
@@ -93,7 +101,12 @@ function decodeFile(filename: string): string {
93101
console.log(`samples length ${wave.samples.length}`);
94102
const resultList: string[] = [];
95103

96-
const windowSize: number = vad.config.sileroVad.windowSize;
104+
let windowSize: number = vad.config.sileroVad.windowSize;
105+
106+
if (vad.config.tenVad.model != '') {
107+
windowSize = vad.config.tenVad.windowSize;
108+
}
109+
97110
for (let i = 0; i < wave.samples.length; i += windowSize) {
98111
const thisWindow: Float32Array = wave.samples.subarray(i, i + windowSize)
99112
vad.acceptWaveform(thisWindow);
@@ -138,7 +151,12 @@ function decodeFile(filename: string): string {
138151
function decodeMic(samples: Float32Array) {
139152
const resultList: string[] = [];
140153

141-
const windowSize: number = vad.config.sileroVad.windowSize;
154+
let windowSize: number = vad.config.sileroVad.windowSize;
155+
156+
if (vad.config.tenVad.model != '') {
157+
windowSize = vad.config.tenVad.windowSize;
158+
}
159+
142160
for (let i = 0; i < samples.length; i += windowSize) {
143161
const thisWindow: Float32Array = samples.subarray(i, i + windowSize)
144162
vad.acceptWaveform(thisWindow);

nodejs-addon-examples/test_vad_asr_non_streaming_moonshine_microphone.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,11 @@ ai.on('data', data => {
9797
console.log(`${index}: ${text}`);
9898

9999
const filename = `${index}-${text}-${
100-
new Date()
101-
.toLocaleTimeString('en-US', {hour12: false})
102-
.split(' ')[0]}.wav`;
100+
new Date()
101+
.toLocaleTimeString('en-US', {hour12: false})
102+
.split(' ')[0]}.wav`
103+
.replace(/:/g, '-');
104+
103105
sherpa_onnx.writeWave(
104106
filename,
105107
{samples: segment.samples, sampleRate: vad.config.sampleRate});

nodejs-addon-examples/test_vad_asr_non_streaming_nemo_ctc_microphone.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,11 @@ ai.on('data', data => {
9494
console.log(`${index}: ${text}`);
9595

9696
const filename = `${index}-${text}-${
97-
new Date()
98-
.toLocaleTimeString('en-US', {hour12: false})
99-
.split(' ')[0]}.wav`;
97+
new Date()
98+
.toLocaleTimeString('en-US', {hour12: false})
99+
.split(' ')[0]}.wav`
100+
.replace(/:/g, '-');
101+
100102
sherpa_onnx.writeWave(
101103
filename,
102104
{samples: segment.samples, sampleRate: vad.config.sampleRate});

nodejs-addon-examples/test_vad_asr_non_streaming_paraformer_microphone.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,11 @@ ai.on('data', data => {
9292
console.log(`${index}: ${text}`);
9393

9494
const filename = `${index}-${text}-${
95-
new Date()
96-
.toLocaleTimeString('en-US', {hour12: false})
97-
.split(' ')[0]}.wav`;
95+
new Date()
96+
.toLocaleTimeString('en-US', {hour12: false})
97+
.split(' ')[0]}.wav`
98+
.replace(/:/g, '-');
99+
98100
sherpa_onnx.writeWave(
99101
filename,
100102
{samples: segment.samples, sampleRate: vad.config.sampleRate});

nodejs-addon-examples/test_vad_asr_non_streaming_sense_voice_microphone.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,11 @@ ai.on('data', data => {
9595
console.log(`${index}: ${text}`);
9696

9797
const filename = `${index}-${text}-${
98-
new Date()
99-
.toLocaleTimeString('en-US', {hour12: false})
100-
.split(' ')[0]}.wav`;
98+
new Date()
99+
.toLocaleTimeString('en-US', {hour12: false})
100+
.split(' ')[0]}.wav`
101+
.replace(/:/g, '-');
102+
101103
sherpa_onnx.writeWave(
102104
filename,
103105
{samples: segment.samples, sampleRate: vad.config.sampleRate});

nodejs-addon-examples/test_vad_asr_non_streaming_transducer_microphone.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,11 @@ ai.on('data', data => {
9797
console.log(`${index}: ${text}`);
9898

9999
const filename = `${index}-${text}-${
100-
new Date()
101-
.toLocaleTimeString('en-US', {hour12: false})
102-
.split(' ')[0]}.wav`;
100+
new Date()
101+
.toLocaleTimeString('en-US', {hour12: false})
102+
.split(' ')[0]}.wav`
103+
.replace(/:/g, '-');
104+
103105
sherpa_onnx.writeWave(
104106
filename,
105107
{samples: segment.samples, sampleRate: vad.config.sampleRate});

nodejs-addon-examples/test_vad_asr_non_streaming_whisper_microphone.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,11 @@ ai.on('data', data => {
9393
console.log(`${index}: ${text}`);
9494

9595
const filename = `${index}-${text}-${
96-
new Date()
97-
.toLocaleTimeString('en-US', {hour12: false})
98-
.split(' ')[0]}.wav`;
96+
new Date()
97+
.toLocaleTimeString('en-US', {hour12: false})
98+
.split(' ')[0]}.wav`
99+
.replace(/:/g, '-');
100+
99101
sherpa_onnx.writeWave(
100102
filename,
101103
{samples: segment.samples, sampleRate: vad.config.sampleRate});

0 commit comments

Comments
 (0)