@@ -49,16 +49,62 @@ class SileroVadModelConfig {
49
49
final double maxSpeechDuration;
50
50
}
51
51
52
+ class TenVadModelConfig {
53
+ const TenVadModelConfig (
54
+ {this .model = '' ,
55
+ this .threshold = 0.5 ,
56
+ this .minSilenceDuration = 0.5 ,
57
+ this .minSpeechDuration = 0.25 ,
58
+ this .windowSize = 256 ,
59
+ this .maxSpeechDuration = 5.0 });
60
+
61
+ factory TenVadModelConfig .fromJson (Map <String , dynamic > json) {
62
+ return TenVadModelConfig (
63
+ model: json['model' ] as String ? ?? '' ,
64
+ threshold: (json['threshold' ] as num ? )? .toDouble () ?? 0.5 ,
65
+ minSilenceDuration:
66
+ (json['minSilenceDuration' ] as num ? )? .toDouble () ?? 0.5 ,
67
+ minSpeechDuration:
68
+ (json['minSpeechDuration' ] as num ? )? .toDouble () ?? 0.25 ,
69
+ windowSize: json['windowSize' ] as int ? ?? 256 ,
70
+ maxSpeechDuration: (json['maxSpeechDuration' ] as num ? )? .toDouble () ?? 5.0 ,
71
+ );
72
+ }
73
+
74
+ @override
75
+ String toString () {
76
+ return 'TenVadModelConfig(model: $model , threshold: $threshold , minSilenceDuration: $minSilenceDuration , minSpeechDuration: $minSpeechDuration , windowSize: $windowSize , maxSpeechDuration: $maxSpeechDuration )' ;
77
+ }
78
+
79
+ Map <String , dynamic > toJson () => {
80
+ 'model' : model,
81
+ 'threshold' : threshold,
82
+ 'minSilenceDuration' : minSilenceDuration,
83
+ 'minSpeechDuration' : minSpeechDuration,
84
+ 'windowSize' : windowSize,
85
+ 'maxSpeechDuration' : maxSpeechDuration,
86
+ };
87
+
88
+ final String model;
89
+ final double threshold;
90
+ final double minSilenceDuration;
91
+ final double minSpeechDuration;
92
+ final int windowSize;
93
+ final double maxSpeechDuration;
94
+ }
95
+
52
96
class VadModelConfig {
53
97
VadModelConfig ({
54
98
this .sileroVad = const SileroVadModelConfig (),
55
99
this .sampleRate = 16000 ,
56
100
this .numThreads = 1 ,
57
101
this .provider = 'cpu' ,
58
102
this .debug = true ,
103
+ this .tenVad = const TenVadModelConfig (),
59
104
});
60
105
61
106
final SileroVadModelConfig sileroVad;
107
+ final TenVadModelConfig tenVad;
62
108
final int sampleRate;
63
109
final int numThreads;
64
110
final String provider;
@@ -68,6 +114,8 @@ class VadModelConfig {
68
114
return VadModelConfig (
69
115
sileroVad: SileroVadModelConfig .fromJson (
70
116
json['sileroVad' ] as Map <String , dynamic >? ?? const {}),
117
+ tenVad: TenVadModelConfig .fromJson (
118
+ json['tenVad' ] as Map <String , dynamic >? ?? const {}),
71
119
sampleRate: json['sampleRate' ] as int ? ?? 16000 ,
72
120
numThreads: json['numThreads' ] as int ? ?? 1 ,
73
121
provider: json['provider' ] as String ? ?? 'cpu' ,
@@ -77,6 +125,7 @@ class VadModelConfig {
77
125
78
126
Map <String , dynamic > toJson () => {
79
127
'sileroVad' : sileroVad.toJson (),
128
+ 'tenVad' : tenVad.toJson (),
80
129
'sampleRate' : sampleRate,
81
130
'numThreads' : numThreads,
82
131
'provider' : provider,
@@ -85,7 +134,7 @@ class VadModelConfig {
85
134
86
135
@override
87
136
String toString () {
88
- return 'VadModelConfig(sileroVad: $sileroVad , sampleRate: $sampleRate , numThreads: $numThreads , provider: $provider , debug: $debug )' ;
137
+ return 'VadModelConfig(sileroVad: $sileroVad , tenVad: $ tenVad , sampleRate: $sampleRate , numThreads: $numThreads , provider: $provider , debug: $debug )' ;
89
138
}
90
139
}
91
140
@@ -168,15 +217,24 @@ class VoiceActivityDetector {
168
217
{required VadModelConfig config, required double bufferSizeInSeconds}) {
169
218
final c = calloc <SherpaOnnxVadModelConfig >();
170
219
171
- final modelPtr = config.sileroVad.model.toNativeUtf8 ();
172
- c.ref.sileroVad.model = modelPtr ;
220
+ final sileroVadModelPtr = config.sileroVad.model.toNativeUtf8 ();
221
+ c.ref.sileroVad.model = sileroVadModelPtr ;
173
222
174
223
c.ref.sileroVad.threshold = config.sileroVad.threshold;
175
224
c.ref.sileroVad.minSilenceDuration = config.sileroVad.minSilenceDuration;
176
225
c.ref.sileroVad.minSpeechDuration = config.sileroVad.minSpeechDuration;
177
226
c.ref.sileroVad.windowSize = config.sileroVad.windowSize;
178
227
c.ref.sileroVad.maxSpeechDuration = config.sileroVad.maxSpeechDuration;
179
228
229
+ final tenVadModelPtr = config.tenVad.model.toNativeUtf8 ();
230
+ c.ref.tenVad.model = tenVadModelPtr;
231
+
232
+ c.ref.tenVad.threshold = config.tenVad.threshold;
233
+ c.ref.tenVad.minSilenceDuration = config.tenVad.minSilenceDuration;
234
+ c.ref.tenVad.minSpeechDuration = config.tenVad.minSpeechDuration;
235
+ c.ref.tenVad.windowSize = config.tenVad.windowSize;
236
+ c.ref.tenVad.maxSpeechDuration = config.tenVad.maxSpeechDuration;
237
+
180
238
c.ref.sampleRate = config.sampleRate;
181
239
c.ref.numThreads = config.numThreads;
182
240
@@ -190,7 +248,8 @@ class VoiceActivityDetector {
190
248
nullptr;
191
249
192
250
calloc.free (providerPtr);
193
- calloc.free (modelPtr);
251
+ calloc.free (tenVadModelPtr);
252
+ calloc.free (sileroVadModelPtr);
194
253
calloc.free (c);
195
254
196
255
return VoiceActivityDetector ._(ptr: ptr, config: config);
0 commit comments