k2-fsa
diff --git a/‎.github/scripts/test-offline-transducer.sh
Lines changed: 33 additions & 1 deletion b/‎.github/scripts/test-offline-transducer.sh
Lines changed: 33 additions & 1 deletion
diff --git a/‎.github/scripts/test-online-transducer.sh
Lines changed: 54 additions & 1 deletion b/‎.github/scripts/test-online-transducer.sh
Lines changed: 54 additions & 1 deletion
diff --git a/‎.github/scripts/test-python.sh
Lines changed: 31 additions & 1 deletion b/‎.github/scripts/test-python.sh
Lines changed: 31 additions & 1 deletion
diff --git a/‎python-api-examples/offline-decode-files.py
Lines changed: 56 additions & 0 deletions b/‎python-api-examples/offline-decode-files.py
Lines changed: 56 additions & 0 deletions
diff --git a/‎python-api-examples/online-decode-files.py
Lines changed: 34 additions & 0 deletions b/‎python-api-examples/online-decode-files.py
Lines changed: 34 additions & 0 deletions
diff --git a/‎sherpa-onnx/csrc/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎sherpa-onnx/csrc/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎sherpa-onnx/csrc/hypothesis.h
Lines changed: 5 additions & 0 deletions b/‎sherpa-onnx/csrc/hypothesis.h
Lines changed: 5 additions & 0 deletions
@@ -281,7 +281,39 @@ time $EXE \
   $repo/test_wavs/1.wav \
   $repo/test_wavs/8k.wav
 
-rm -rf $repo
+lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
+log "Download pre-trained RNN-LM model from ${lm_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
+lm_repo=$(basename $lm_repo_url)
+pushd $lm_repo
+git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
+popd
+
+bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
+log "Download bi-gram LM from ${bigram_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
+bigramlm_repo=$(basename $bigram_repo_url)
+pushd $bigramlm_repo
+git lfs pull --include "2gram.fst"
+popd
+
+log "Start testing with LM and bi-gram LODR"
+# TODO: find test examples that change with the LODR
+time $EXE \
+  --tokens=$repo/tokens.txt \
+  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
+  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
+  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
+  --num-threads=2 \
+  --decoding_method="modified_beam_search" \
+  --lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
+  --lodr-fst=$bigramlm_repo/2gram.fst \
+  --lodr-scale=-0.5  \
+  $repo/test_wavs/0.wav \
+  $repo/test_wavs/1.wav \
+  $repo/test_wavs/8k.wav
+
+rm -rf $repo $lm_repo $bigramlm_repo
 
 log "------------------------------------------------------------"
 log "Run Paraformer (Chinese)"
 
@@ -174,7 +174,60 @@ for wave in ${waves[@]}; do
   $wave
 done
 
-rm -rf $repo
+lm_repo_url=https://huggingface.co/vsd-vector/icefall-librispeech-rnn-lm
+log "Download pre-trained RNN-LM model from ${lm_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
+lm_repo=$(basename $lm_repo_url)
+pushd $lm_repo
+git lfs pull --include "with-state-epoch-99-avg-1.onnx"
+popd
+
+bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
+log "Download bi-gram LM from ${bigram_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
+bigramlm_repo=$(basename $bigram_repo_url)
+pushd $bigramlm_repo
+git lfs pull --include "2gram.fst"
+popd
+
+log "Start testing LODR"
+
+waves=(
+$repo/test_wavs/0.wav
+$repo/test_wavs/1.wav
+$repo/test_wavs/8k.wav
+)
+
+for wave in ${waves[@]}; do
+  time $EXE \
+  --tokens=$repo/tokens.txt \
+  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
+  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
+  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
+  --num-threads=2 \
+  --decoding_method="modified_beam_search" \
+  --lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
+  --lodr-fst=$bigramlm_repo/2gram.fst \
+  --lodr-scale=-0.5  \
+  $wave
+done
+
+for wave in ${waves[@]}; do
+  time $EXE \
+  --tokens=$repo/tokens.txt \
+  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
+  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
+  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
+  --num-threads=2 \
+  --decoding_method="modified_beam_search" \
+  --lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
+  --lodr-fst=$bigramlm_repo/2gram.fst \
+  --lodr-scale=-0.5  \
+  --lm-shallow-fusion=true \
+  $wave
+done
+
+rm -rf $repo $bigramlm_repo $lm_repo
 
 log "------------------------------------------------------------"
 log "Run streaming Zipformer transducer (Bilingual, Chinese + English)"
 
@@ -562,9 +562,39 @@ python3 ./python-api-examples/offline-decode-files.py \
   $repo/test_wavs/1.wav \
   $repo/test_wavs/8k.wav
 
+lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
+log "Download pre-trained RNN-LM model from ${lm_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
+lm_repo=$(basename $lm_repo_url)
+pushd $lm_repo
+git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
+popd
+
+bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
+log "Download bi-gram LM from ${bigram_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
+bigramlm_repo=$(basename $bigram_repo_url)
+pushd $bigramlm_repo
+git lfs pull --include "2gram.fst"
+popd
+
+log "Perform offline decoding with RNN-LM and LODR"
+python3 ./python-api-examples/offline-decode-files.py \
+  --tokens=$repo/tokens.txt \
+  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
+  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
+  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
+  --decoding-method=modified_beam_search \
+  --lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
+  --lodr-fst=$bigramlm_repo/2gram.fst \
+  --lodr-scale=-0.5 \
+  $repo/test_wavs/0.wav \
+  $repo/test_wavs/1.wav \
+  $repo/test_wavs/8k.wav
+
 python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose
 
-rm -rf $repo
+rm -rf $repo $lm_repo $bigramlm_repo
 
 log "Test non-streaming paraformer models"
 
 
@@ -35,6 +35,25 @@
       /path/to/0.wav \
       /path/to/1.wav
 
+    also with RNN LM rescoring and LODR (optional):
+
+    ./python-api-examples/offline-decode-files.py  \
+      --tokens=/path/to/tokens.txt \
+      --encoder=/path/to/encoder.onnx \
+      --decoder=/path/to/decoder.onnx \
+      --joiner=/path/to/joiner.onnx \
+      --num-threads=2 \
+      --decoding-method=modified_beam_search \
+      --debug=false \
+      --sample-rate=16000 \
+      --feature-dim=80 \
+      --lm=/path/to/lm.onnx \
+      --lm-scale=0.1 \
+      --lodr-fst=/path/to/lodr.fst \
+      --lodr-scale=-0.1 \
+      /path/to/0.wav \
+      /path/to/1.wav
+
 (3) For CTC models from NeMo
 
 python3 ./python-api-examples/offline-decode-files.py \
@@ -269,6 +288,39 @@ def get_args():
         default="greedy_search",
         help="Valid values are greedy_search and modified_beam_search",
     )
+
+    parser.add_argument(
+        "--lm",
+        metavar="file",
+        type=str,
+        default="",
+        help="Path to RNN LM model",
+    )
+
+    parser.add_argument(
+        "--lm-scale",
+        metavar="lm_scale",
+        type=float,
+        default=0.1,
+        help="LM model scale for rescoring",
+    )
+
+    parser.add_argument(
+        "--lodr-fst",
+        metavar="file",
+        type=str,
+        default="",
+        help="Path to LODR FST model. Used only when --lm is given.",
+    )
+
+    parser.add_argument(
+        "--lodr-scale",
+        metavar="lodr_scale",
+        type=float,
+        default=-0.1,
+        help="LODR scale for rescoring.Used only when --lodr_fst is given.",
+    )
+
     parser.add_argument(
         "--debug",
         type=bool,
@@ -364,6 +416,10 @@ def main():
             num_threads=args.num_threads,
             sample_rate=args.sample_rate,
             feature_dim=args.feature_dim,
+            lm=args.lm,
+            lm_scale=args.lm_scale,
+            lodr_fst=args.lodr_fst,
+            lodr_scale=args.lodr_scale,
             decoding_method=args.decoding_method,
             hotwords_file=args.hotwords_file,
             hotwords_score=args.hotwords_score,
 
@@ -21,6 +21,22 @@
   ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/1.wav \
   ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/8k.wav
 
+or with RNN LM rescoring and LODR:
+
+./python-api-examples/online-decode-files.py \
+  --tokens=./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt \
+  --encoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \
+  --decoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \
+  --joiner=./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \
+  --decoding-method=modified_beam_search \
+  --lm=/path/to/lm.onnx \
+  --lm-scale=0.1 \
+  --lodr-fst=/path/to/lodr.fst \
+  --lodr-scale=-0.1 \
+  ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav \
+  ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/1.wav \
+  ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/8k.wav
+
 (2) Streaming paraformer
 
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
@@ -186,6 +202,22 @@ def get_args():
         """,
     )
 
+    parser.add_argument(
+        "--lodr-fst",
+        metavar="file",
+        type=str,
+        default="",
+        help="Path to LODR FST model. Used only when --lm is given.",
+    )
+
+    parser.add_argument(
+        "--lodr-scale",
+        metavar="lodr_scale",
+        type=float,
+        default=-0.1,
+        help="LODR scale for rescoring.Used only when --lodr_fst is given.",
+    )
+
     parser.add_argument(
         "--provider",
         type=str,
@@ -320,6 +352,8 @@ def main():
             max_active_paths=args.max_active_paths,
             lm=args.lm,
             lm_scale=args.lm_scale,
+            lodr_fst=args.lodr_fst,
+            lodr_scale=args.lodr_scale,
             hotwords_file=args.hotwords_file,
             hotwords_score=args.hotwords_score,
             modeling_unit=args.modeling_unit,
 
@@ -25,6 +25,7 @@ set(sources
   jieba.cc
   keyword-spotter-impl.cc
   keyword-spotter.cc
+  lodr-fst.cc
   offline-canary-model-config.cc
   offline-canary-model.cc
   offline-ctc-fst-decoder-config.cc
 
@@ -12,9 +12,11 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
+#include <memory>
 
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/context-graph.h"
+#include "sherpa-onnx/csrc/lodr-fst.h"
 #include "sherpa-onnx/csrc/math.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 
@@ -61,6 +63,9 @@ struct Hypothesis {
   // the nn lm states
   std::vector<CopyableOrtValue> nn_lm_states;
 
+  // the LODR states
+  std::shared_ptr<LodrStateCost> lodr_state;
+
   const ContextState *context_state;
 
   // TODO(fangjun): Make it configurable