Version 0.3.2

Actual hotfix. Version 0.3.1 got yanked because I misspecified the version of nanobind in pyproject.toml
nvidia-riva · Apr 3, 2023 · cb1a00d · cb1a00d
1 parent e01c7cb
commit cb1a00d
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 2 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ requires = [
     "wheel",
     "cmake>=3.25",
     "ninja",
-    "nanobind@git+https://github.com/galv/nanobind#egg=fix-batch-size-1-type-cast",
+    "nanobind@git+https://github.com/galv/nanobind@fix-batch-size-1-type-cast",
 ]
 
 build-backend = "setuptools.build_meta"

diff --git a/setup.py b/setup.py
@@ -96,7 +96,7 @@ def build_extension(self, ext: setuptools.extension.Extension):
 setuptools.setup(
     python_requires='>=3.7',
     name='riva-asrlib-decoder',
-    version='0.3.1',
+    version='0.3.2',
     author='NVIDIA',
     author_email='dgalvez@nvidia.com',
     keywords='ASR, CUDA, WFST, Decoder',

diff --git a/src/riva/asrlib/decoder/test_graph_construction.py b/src/riva/asrlib/decoder/test_graph_construction.py
@@ -230,6 +230,33 @@ def test_vanilla_ctc_topo_wer_mbr(self, nemo_model_name, dataset, expected_wer,
         assert my_wer <= expected_wer + ERROR_MARGIN
 
 
+    def test_batch_size_1(self):
+        """
+        Integration test for https://github.com/wjakob/nanobind/pull/162
+        """
+        work_dir = os.path.join(self.temp_dir, "ctc")
+        nemo_model_name = "stt_en_conformer_ctc_small"
+
+        asr_model = nemo_asr.models.ASRModel.from_pretrained(
+            nemo_model_name, map_location=torch.device("cuda")
+        )
+
+        self.create_TLG("ctc", work_dir, nemo_model_name)
+
+        num_tokens_including_blank = len(asr_model.to_config_dict()["decoder"]["vocabulary"]) + 1
+
+        decoder_config = self.create_decoder_config()
+        decoder = BatchedMappedDecoderCuda(
+            decoder_config,
+            os.path.join(work_dir, "graph/graph_ctc_3-gram.pruned.3e-7/TLG.fst"),
+            os.path.join(work_dir, "graph/graph_ctc_3-gram.pruned.3e-7/words.txt"),
+            num_tokens_including_blank,
+        )
+
+        logits = torch.ones((1,100, num_tokens_including_blank), dtype=torch.float32).cuda()
+        lengths = torch.tensor([100], dtype=torch.int64).cpu()
+        decoder.decode_mbr(logits.detach(), lengths.detach())
+
     # Note that nbest decoding tends to produce a worse WER than mbr
     # decoding. This is expected.
     @pytest.mark.parametrize(