model stringlengths 12 49 | RTFx float64 -1 6.4k | License stringclasses 7
values | Size (B) float64 -1 24 ⌀ | # Languages float64 1 1.68k ⌀ | Encoder stringclasses 6
values | Decoder stringclasses 14
values | AMI WER float64 8.03 86.8 | Earnings22 WER float64 8.37 51.9 | Gigaspeech WER float64 8.5 42.4 | LS Clean WER float64 1.25 22.1 | LS Other WER float64 2.37 28.7 | SPGISpeech WER float64 1.59 27.6 | Voxpopuli WER float64 5.37 32.5 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
abr-ai/asr-19m-v2-en-32b | 2,862.63 | applied-brain-research-open-license | 0.02 | null | null | null | 18.76 | 13.53 | 15.44 | 4.66 | 11.16 | 3.94 | 9.88 |
aquavoice/avalon-v1-en | -1 | Proprietary | null | null | null | null | 11.58 | 11.37 | 9.52 | 1.68 | 3.28 | 2.1 | 7.33 |
assemblyai/universal-3-pro | -1 | Proprietary | null | 99 | null | null | 13.85 | 10.9 | 9.14 | 1.53 | 2.7 | 1.68 | 6.83 |
bosonai/higgs-audio-v3-8b-stt-v2 | 75.66 | apache-2.0 | 8.91 | 1 | Whisper | Qwen3 | 10.07 | 8.79 | 8.5 | 1.27 | 2.43 | 3.6 | 6.01 |
distil-whisper/distil-large-v2 | 202.946441 | mit | 0.8 | null | Whisper | Transformer | 14.67 | 12.19 | 10.32 | 2.94 | 6.84 | 3.3 | 8.24 |
distil-whisper/distil-large-v3 | 214.421431 | mit | 0.8 | null | Whisper | Transformer | 15.16 | 11.79 | 10.08 | 2.54 | 5.19 | 3.27 | 8.25 |
distil-whisper/distil-large-v3.5 | 202.03 | mit | 0.8 | 1 | Whisper | Transformer | 14.63 | 11.29 | 9.84 | 2.37 | 5.04 | 2.87 | 8.04 |
distil-whisper/distil-medium.en | 279.733104 | mit | 0.4 | null | Whisper | Transformer | 16.12 | 12.99 | 11.3 | 3.69 | 8.35 | 3.83 | 9 |
distil-whisper/distil-small.en | 331.893486 | mit | 0.2 | null | Whisper | Transformer | 16.16 | 13.15 | 10.87 | 3.48 | 7.73 | 3.82 | 8.79 |
efficient-speech/lite-whisper-large-v3 | 115.83 | apache-2.0 | 1 | null | Whisper | Transformer | 16.9 | 11.55 | 10.26 | 2.1 | 4.4 | 2.85 | 7.67 |
efficient-speech/lite-whisper-large-v3-acc | 117.8 | apache-2.0 | 1 | 99 | Whisper | Transformer | 16.1 | 11.04 | 10.1 | 2 | 3.91 | 2.89 | 8.11 |
efficient-speech/lite-whisper-large-v3-fast | 120.76 | apache-2.0 | 1 | null | Whisper | Transformer | 19.87 | 12.48 | 10.66 | 2.25 | 5.19 | 3.15 | 7.79 |
efficient-speech/lite-whisper-large-v3-turbo-acc | 191.71 | apache-2.0 | 0.7 | null | Whisper | Transformer | 16.97 | 11.53 | 10.21 | 2.15 | 4.6 | 2.93 | 8.17 |
elevenlabs/scribe_v1 | -1 | Proprietary | null | 99 | null | null | 14.43 | 12.14 | 9.66 | 1.79 | 3.31 | 3.3 | 7.2 |
elevenlabs/scribe_v2 | -1 | Proprietary | null | 90 | null | null | 11.86 | 9.43 | 9.11 | 1.54 | 2.83 | 2.68 | 6.8 |
espnet/owsm_ctc_v3.1_1B | 502.02 | cc-by-4.0 | 1.1 | null | Whisper | CTC | 15.61 | 13.74 | 11.88 | 2.35 | 5.15 | 2.87 | 8.4 |
espnet/owsm_ctc_v3.2_ft_1B | 339.4 | cc-by-4.0 | 1 | null | Whisper | CTC | 16.64 | 13.65 | 11.79 | 2.63 | 5.29 | 2.73 | 8.02 |
espnet/owsm_ctc_v4_1B | 453.97 | cc-by-4.0 | 1 | 75 | Whisper | CTC | 13.1 | 13.66 | 10.83 | 2.59 | 4.89 | 2.55 | 7.35 |
facebook/data2vec-audio-base-960h | 648.138532 | apache-2.0 | 0.09 | null | Self-supervised | CTC | 47.27 | 49.56 | 29.78 | 12.13 | 15.48 | 25.46 | 27.25 |
facebook/data2vec-audio-large-960h | 470.154204 | apache-2.0 | 0.31 | null | Self-supervised | CTC | 40.51 | 37.82 | 24.8 | 11.4 | 12.94 | 18.49 | 23.86 |
facebook/hubert-large-ls960-ft | 495.862704 | apache-2.0 | 0.32 | null | Self-supervised | CTC | 39.72 | 35.24 | 25.01 | 11.35 | 12.75 | 18.86 | 22.7 |
facebook/hubert-xlarge-ls960-ft | 361.317654 | apache-2.0 | 0.96 | null | Self-supervised | CTC | 39.11 | 36.13 | 24.74 | 11.3 | 12.22 | 18.58 | 22.47 |
facebook/mms-1b-all | 230.794251 | cc-by-nc-4.0 | 0.96 | 1,100 | Self-supervised | CTC | 42.02 | 31.17 | 26.44 | 12.63 | 15.99 | 16.95 | 17.63 |
facebook/mms-1b-fl102 | 234.423174 | cc-by-nc-4.0 | 0.96 | 1,100 | Self-supervised | CTC | 86.78 | 51.87 | 42.42 | 22.07 | 28.7 | 26.21 | 27.97 |
facebook/omniASR-CTC-7B-v2 | 267.43 | apache-2.0 | 6.5 | 1,676 | Self-supervised | CTC | 23.86 | 17.11 | 15.37 | 2.56 | 5.6 | 6.07 | 8.39 |
facebook/omniASR-LLM-7B-v2 | 65.97 | apache-2.0 | 7.8 | 1,676 | Self-supervised | Transformer | 20.73 | 13.48 | 11.15 | 1.66 | 3.64 | 3.65 | 6.69 |
facebook/wav2vec2-base-960h | 686.002907 | apache-2.0 | 0.09 | null | Self-supervised | CTC | 45.56 | 48.47 | 30.85 | 12.53 | 16.72 | 27.56 | 32.48 |
facebook/wav2vec2-conformer-rel-pos-large-960h-ft | 522.456837 | apache-2.0 | 0.62 | null | Self-supervised, Conformer-based | CTC | 42.39 | 38.33 | 24.96 | 11.2 | 12.44 | 18.85 | 22.39 |
facebook/wav2vec2-conformer-rope-large-960h-ft | 607.869462 | apache-2.0 | 0.6 | null | Self-supervised, Conformer-based | CTC | 42.47 | 37.52 | 25 | 11.34 | 12.54 | 18.87 | 22.61 |
facebook/wav2vec2-large-960h | 516.579659 | apache-2.0 | 0.32 | null | Self-supervised | CTC | 42.66 | 43.75 | 27.74 | 12.81 | 15.46 | 22.82 | 30.09 |
facebook/wav2vec2-large-960h-lv60-self | 509.320417 | apache-2.0 | 0.32 | null | Self-supervised | CTC | 36.77 | 31.68 | 23.94 | 11.13 | 12.42 | 17.94 | 21.42 |
facebook/wav2vec2-large-robust-ft-libri-960h | 503.808561 | apache-2.0 | 0.3 | null | Self-supervised | CTC | 37.75 | 36.22 | 25.12 | 11.84 | 13.76 | 19.03 | 23.27 |
fixie-ai/ultravox-v0_6-llama-3_1-8b | 6.22 | mit | 0.7 | 8 | Whisper | Llama 3? | 21.53 | 15.35 | 9.82 | 2.11 | 4.35 | 3.63 | 8.24 |
google/chirp | -1 | Proprietary | 2 | 100 | null | null | 15.9 | 13.25 | 10.31 | 3.53 | 6.38 | 4.23 | 8.92 |
google/chirp_2 | -1 | Proprietary | null | 468 | null | null | 8.91 | 12.67 | 9.8 | 2.29 | 5.06 | 2.4 | 7.03 |
google/chirp_3 | -1 | Proprietary | null | 896 | null | null | 11.8 | 10.58 | 9.67 | 2.04 | 4.58 | 3.98 | 7.52 |
ibm-granite/granite-speech-3.3-2b | 270.57 | apache-2.0 | 3 | 5 | Conformer-based | Granite | 8.9 | 10.25 | 10.69 | 1.53 | 3.26 | 3.87 | 5.93 |
ibm-granite/granite-speech-3.3-8b | 145.42 | apache-2.0 | 9 | 5 | Conformer-based | Granite | 8.98 | 9.42 | 10.19 | 1.43 | 2.86 | 3.91 | 5.72 |
ibm-granite/granite-4.0-1b-speech | 280.02 | apache-2.0 | 2 | 6 | Conformer-based | Granite | 8.44 | 8.48 | 10.14 | 1.42 | 2.85 | 3.89 | 5.84 |
ibm-granite/granite-speech-4.1-2b | 231.29 | apache-2.0 | 2 | 6 | Conformer-based | Granite | 8.09 | 8.37 | 9.8 | 1.33 | 2.5 | 3.78 | 5.7 |
ibm-granite/granite-speech-4.1-2b-nar | 879.12 | apache-2.0 | 2 | 5 | Conformer-based | Hybrid CTC-LLM | 8.03 | 8.44 | 10.16 | 1.28 | 2.77 | 3.33 | 5.86 |
kyutai/stt-2.6b-en | 88.37 | cc-by-4.0 | 2.6 | null | Mimi tokenizer | Transformer | 12.17 | 10.99 | 9.81 | 1.7 | 4.32 | 2.03 | 6.79 |
microsoft/Phi-4-multimodal-instruct | 151.1 | mit | 6 | 8 | Conformer-based | Phi-4-Mini-Instruct | 11.09 | 10.16 | 9.33 | 1.69 | 3.82 | 3.06 | 6.04 |
microsoft/VibeVoice-ASR-HF | 51.8 | mit | 8 | 50 | Custom | Qwen2 | 17.2 | 13.17 | 9.67 | 2.2 | 5.51 | 3.8 | 8.01 |
mistralai/Voxtral-Mini-3B-2507 | 109.86 | apache-2.0 | 5 | 8 | Whisper | Ministral 3B | 16.3 | 10.69 | 10.24 | 1.88 | 4.1 | 2.37 | 7.14 |
mistralai/Voxtral-Mini-4B-Realtime-2602 | 93.32 | apache-2.0 | 4 | 13 | Custom | Transformer | 17.07 | 11.84 | 10.38 | 2.08 | 5.52 | 2.42 | 8.34 |
mistralai/Voxtral-Small-24B-2507 | 54.09 | apache-2.0 | 24 | 8 | Whisper | Ministral 24B | 15.27 | 10.5 | 9.81 | 1.59 | 3.26 | 2.02 | 6.96 |
nvidia/canary-180m-flash | 1,233.58 | cc-by-4.0 | 0.18 | 4 | Conformer-based | Transformer | 14.86 | 12.33 | 10.51 | 1.73 | 4.35 | 2.26 | 6.35 |
nvidia/canary-1b | 235.343279 | cc-by-nc-4.0 | 1 | 4 | Conformer-based | Transformer | 13.9 | 12.19 | 10.12 | 1.48 | 2.93 | 2.06 | 5.79 |
nvidia/canary-1b-flash | 1,045.75 | cc-by-4.0 | 1 | 4 | Conformer-based | Transformer | 13.11 | 12.77 | 9.85 | 1.48 | 2.87 | 1.95 | 5.63 |
nvidia/canary-1b-v2 | 749 | cc-by-4.0 | 1 | 25 | Conformer-based | Transformer | 16.01 | 11.79 | 10.82 | 2.18 | 3.56 | 2.28 | 6.25 |
nvidia/canary-qwen-2.5b | 418.28 | cc-by-4.0 | 2.5 | null | Conformer-based | Qwen2 | 10.19 | 10.45 | 9.43 | 1.61 | 3.1 | 1.9 | 5.66 |
nvidia/parakeet-ctc-0.6b | 4,281.529811 | cc-by-4.0 | 0.6 | null | Conformer-based | CTC | 16.46 | 14.26 | 10.39 | 1.88 | 3.8 | 3.89 | 7.07 |
nvidia/parakeet-ctc-1.1b | 2,728.522295 | cc-by-4.0 | 1.1 | null | Conformer-based | CTC | 15.67 | 13.75 | 10.28 | 1.83 | 3.51 | 4.02 | 6.56 |
nvidia/parakeet-rnnt-0.6b | 2,815.724575 | cc-by-4.0 | 0.6 | null | Conformer-based | TDT / RNN-T | 17.4 | 14.66 | 10.01 | 1.62 | 3.02 | 3.32 | 6.08 |
nvidia/parakeet-rnnt-1.1b | 2,053.14697 | cc-by-4.0 | 1.1 | null | Conformer-based | TDT / RNN-T | 17.01 | 13.94 | 9.89 | 1.45 | 2.5 | 2.93 | 5.44 |
nvidia/parakeet-tdt_ctc-110m | 5,345.14 | cc-by-4.0 | 0.11 | null | Conformer-based | CTC, TDT / RNN-T | 15.89 | 12.37 | 10.52 | 2.4 | 5.22 | 2.54 | 6.9 |
nvidia/parakeet-tdt-0.6b-v2 | 3,386.02 | cc-by-4.0 | 0.6 | null | Conformer-based | TDT / RNN-T | 11.16 | 11.15 | 9.74 | 1.69 | 3.19 | 2.17 | 5.95 |
nvidia/parakeet-tdt-0.6b-v3 | 3,332.74 | cc-by-4.0 | 0.6 | 26 | Conformer-based | TDT / RNN-T | 11.39 | 11.19 | 9.57 | 1.92 | 3.59 | 3.98 | 6.09 |
nvidia/parakeet-tdt-1.1b | 2,390.611822 | cc-by-nc-4.0 | 1.1 | null | Conformer-based | TDT / RNN-T | 15.87 | 14.49 | 9.52 | 1.4 | 2.6 | 3.16 | 5.49 |
nvidia/stt_en_conformer_ctc_large | 4,295.006653 | cc-by-4.0 | 0.12 | null | Conformer-based | CTC | 15.95 | 15.99 | 11.6 | 2.05 | 4.15 | 5.57 | 6.83 |
nvidia/stt_en_conformer_ctc_small | 5,686.896503 | cc-by-4.0 | 0.01 | null | Conformer-based | CTC | 20.43 | 18.84 | 14.46 | 3.59 | 7.92 | 7.8 | 9.07 |
nvidia/stt_en_conformer_transducer_small | 3,714.361304 | Open | 0.014 | null | Conformer-based | TDT / RNN-T | 20.28 | 18.13 | 13.7 | 2.77 | 6.47 | 6.63 | 7.87 |
nvidia/stt_en_fastconformer_ctc_large | 6,399.25031 | cc-by-4.0 | 0.12 | null | Conformer-based | CTC | 18.61 | 18.81 | 12.17 | 1.93 | 4.04 | 5.06 | 6.34 |
nvidia/stt_en_fastconformer_transducer_large | 4,097.432343 | cc-by-4.0 | 0.11 | null | Conformer-based | TDT / RNN-T | 19.09 | 19.41 | 12.31 | 1.8 | 3.97 | 4.97 | 6.45 |
nyrahealth/CrisperWhisper | 84.05 | cc-by-nc-4.0 | 2 | 1 | Whisper | Transformer | 8.71 | 12.89 | 10.24 | 1.82 | 4 | 2.7 | 9.82 |
openai/whisper-base.en | 320.673885 | apache-2.0 | 0.07 | null | Whisper | Transformer | 21.13 | 15.09 | 12.83 | 4.25 | 10.35 | 4.26 | 9.76 |
openai/whisper-large | 143.756319 | apache-2.0 | 2 | 99 | Whisper | Transformer | 16.73 | 12.91 | 10.76 | 2.73 | 5.54 | 3.2 | 7.76 |
openai/whisper-large-v2 | 144.452102 | apache-2.0 | 2 | null | Whisper | Transformer | 16.74 | 12.05 | 10.67 | 2.83 | 5.14 | 3.87 | 7.48 |
openai/whisper-large-v3 | 145.509655 | apache-2.0 | 2 | 99 | Whisper | Transformer | 15.95 | 11.29 | 10.02 | 2.01 | 3.91 | 2.94 | 9.54 |
openai/whisper-large-v3-turbo | 200.19 | mit | 0.8 | null | Whisper | Transformer | 16.13 | 11.63 | 10.14 | 2.1 | 4.24 | 2.97 | 11.87 |
openai/whisper-medium.en | 182.12916 | apache-2.0 | 0.8 | null | Whisper | Transformer | 16.68 | 12.63 | 11.03 | 3.02 | 5.85 | 3.33 | 8.06 |
openai/whisper-small.en | 268.914874 | apache-2.0 | 0.2 | null | Whisper | Transformer | 17.93 | 12.97 | 11.35 | 3.05 | 7.25 | 3.6 | 8.5 |
openai/whisper-tiny.en | 348.123935 | apache-2.0 | 0.04 | null | Whisper | Transformer | 24.24 | 19.12 | 14.08 | 5.66 | 15.45 | 5.93 | 12 |
Qwen/Qwen3-ASR-0.6B | 166.23 | apache-2.0 | 0.6 | 52 | Custom | Qwen3 | 11.66 | 11.06 | 9.14 | 2.13 | 4.45 | 3.03 | 7.07 |
Qwen/Qwen3-ASR-1.7B | 147.93 | apache-2.0 | 1.7 | 52 | Custom | Qwen3 | 10.56 | 10.25 | 8.74 | 1.63 | 3.4 | 2.84 | 6.35 |
revai/fusion | -1 | Proprietary | null | null | null | null | 10.93 | 12.09 | 9.41 | 2.88 | 6.23 | 4.05 | 8.53 |
revai/machine | -1 | Proprietary | null | null | null | null | 10.99 | 12.69 | 9.78 | 4.44 | 8.86 | 4.17 | 9.57 |
speechbrain/asr-conformer-largescaleasr | 42.16 | apache-2.0 | 0.48 | null | Conformer-based | Transformer, CTC | 19.6 | 14.71 | 11.36 | 1.95 | 4.24 | 4.11 | 6.89 |
speechbrain/asr-wav2vec2-librispeech | 451.181976 | apache-2.0 | 0.32 | null | Self-supervised | CTC | 32.05 | 28.52 | 16.92 | 1.77 | 3.83 | 10.39 | 13.72 |
speechmatics/enhanced | -1 | Proprietary | null | 55 | null | null | 14.47 | 12.26 | 9.5 | 2.1 | 4.6 | 3.06 | 6.71 |
smallestai/pulse | -1 | Proprietary | -1 | null | null | null | 11.18 | 11.09 | 9.83 | 1.84 | 3.32 | 2.23 | 6.05 |
usefulsensors/moonshine-base | 565.97 | mit | 0.06 | null | Custom | Transformer | 17.49 | 16.85 | 12.08 | 3.38 | 8.15 | 5.46 | 10.84 |
usefulsensors/moonshine-streaming-medium | 448.15 | mit | 0.24 | null | Custom | Transformer | 10.68 | 11.92 | 9.46 | 2.09 | 5 | 2.58 | 8.54 |
usefulsensors/moonshine-streaming-small | 566.33 | mit | 0.12 | null | Custom | Transformer | 12.53 | 13.55 | 10.4 | 2.49 | 6.78 | 3.19 | 9.98 |
usefulsensors/moonshine-streaming-tiny | 847.2 | mit | 0.03 | null | Custom | Transformer | 19.02 | 20.19 | 13.9 | 4.5 | 12.1 | 6.16 | 14.02 |
usefulsensors/moonshine-tiny | 753.06 | mit | 0.03 | null | Custom | Transformer | 22.84 | 20.73 | 14.21 | 4.55 | 11.68 | 7.43 | 14.11 |
zai-org/GLM-ASR-Nano-2512 | 145.28 | mit | 2 | 17 | Whisper | Llama | 16.15 | 11.08 | 9.73 | 2.15 | 4.42 | 2.08 | 7.54 |
zoom/scribe_v1 | -1 | Proprietary | null | 1 | null | null | 10.03 | 9.53 | 9.61 | 1.63 | 2.81 | 1.59 | 5.37 |
CohereLabs/cohere-transcribe-03-2026 | 524.88 | apache-2.0 | 2 | 14 | Conformer-based | Transformer | 8.13 | 10.86 | 9.34 | 1.25 | 2.37 | 3.08 | 5.87 |
No dataset card yet
- Downloads last month
- 181