From de8b52866f6265466df6634bc7c0a45a263fbc75 Mon Sep 17 00:00:00 2001 From: Sara Papi <spapi@fbk.eu> Date: Tue, 1 Oct 2024 19:24:31 +0200 Subject: [PATCH] Correct link formatting --- README.md | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 3931e50..4a16c0c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -<img src="https://gitlab.fbk.eu/mt/mosel/-/raw/main/mosel-logo-white.png" align="center" width="100%"> +<img src="https://github.com/hlt-mt/mosel/blob/main/mosel-logo-white.png" align="center" width="100%"> # MOSEL: 950,000 Hours of Speech Data for Open-Source Speech Foundation Model Training on EU Languages @@ -17,119 +17,119 @@ </thead> <tbody> <tr> - <td>[CommonVoice](https://commonvoice.mozilla.org/en/datasets)</td> + <td><a href="https://commonvoice.mozilla.org/en/datasets">CommonVoice</a></td> <td>CC 0</td> <td>6,732</td> <td>bg, cs, da, nl, en, et, fi, fr, de, el, hu, ga, it, lv, lt, mt, pl, pt, ro, sk, sl, es, sv</td> <td>✅</td> </tr> <tr> - <td>[CoVoST2](https://github.com/facebookresearch/covost)</td> + <td><a href="https://github.com/facebookresearch/covost">CoVoST2</a></td> <td>CC 0</td> <td>687</td> <td>en, fr, it, es, pt, et, nl, sv, lv, sl</td> <td>✅</td> </tr> <tr> - <td>[CSS10](https://github.com/Kyubyong/css10)</td> + <td><a href="https://github.com/Kyubyong/css10">CSS10</a></td> <td>Public Domain</td> <td>99</td> <td>nl, fi, fr, de, el, hu, es</td> <td>✅</td> </tr> <tr> - <td>[EMU](https://ips-lmu.github.io/EMU.html)</td> + <td><a href="https://ips-lmu.github.io/EMU.html">EMU</a></td> <td>CC BY 3.0</td> <td>56</td> <td>pl</td> <td>✅</td> </tr> <tr> - <td>[EU Parliament](https://clarin-pl.eu/dspace/handle/11321/821)</td> + <td><a href="https://clarin-pl.eu/dspace/handle/11321/821">EU Parliament</a></td> <td>CC BY 4.0</td> <td>32</td> <td>pl</td> <td>✅</td> </tr> <tr> - <td>[FLEURS](https://huggingface.co/datasets/google/fleurs)</td> + <td><a href="https://huggingface.co/datasets/google/fleurs">FLEURS</a></td> <td>CC BY 4.0</td> <td>215</td> <td>bg, cs, da, nl, en, et, fi, fr, de, el, hu, ga, it, lv, lt, mt, pl, pt, ro, sk, sl, es, sv</td> <td>✅</td> </tr> <tr> - <td>[Large Corpus of Czech Parliament Plenary Hearings](https://lindat.cz/repository/xmlui/handle/11234/1-3126)</td> + <td><a href="https://lindat.cz/repository/xmlui/handle/11234/1-3126">Large Corpus of Czech Parliament Plenary Hearings</a></td> <td>CC BY 4.0</td> <td>444</td> <td>cs</td> <td>✅</td> </tr> <tr> - <td>[LibriLight](https://github.com/facebookresearch/libri-light)</td> + <td><a href="https://github.com/facebookresearch/libri-light">LibriLight</a></td> <td>Public Domain</td> <td>57,706</td> <td>en</td> <td>❌</td> </tr> <tr> - <td>[LibriTTS](https://www.openslr.org/60/)</td> + <td><a href="https://www.openslr.org/60/">LibriTTS</a></td> <td>CC BY 4.0</td> <td>585</td> <td>en</td> <td>✅</td> </tr> <tr> - <td>[LibriSpeech](https://www.openslr.org/12)</td> + <td><a href="https://www.openslr.org/12">LibriSpeech</a></td> <td>CC BY 4.0</td> <td>360</td> <td>en</td> <td>✅</td> </tr> <tr> - <td>[LibriVoxDeEn](https://www.cl.uni-heidelberg.de/statnlpgroup/librivoxdeen/)</td> + <td><a href="https://www.cl.uni-heidelberg.de/statnlpgroup/librivoxdeen/">LibriVoxDeEn</a></td> <td>Public Domain</td> <td>547</td> <td>de</td> <td>✅</td> </tr> <tr> - <td>[MC Speech](https://github.com/czyzi0/the-mc-speech-dataset)</td> + <td><a href="https://github.com/czyzi0/the-mc-speech-dataset">MC Speech</a></td> <td>CC 0</td> <td>22</td> <td>pl</td> <td>✅</td> </tr> <tr> - <td>[Multilingual LibriSpeech](https://www.openslr.org/94/)</td> + <td><a href="https://www.openslr.org/94/">Multilingual LibriSpeech</a></td> <td>CC BY 4.0</td> <td>50,687</td> <td>nl, en, fr, de, it, pl, pt, es</td> <td>✅</td> </tr> <tr> - <td>[SIWIS](https://datashare.ed.ac.uk/handle/10283/2353)</td> + <td><a href="https://datashare.ed.ac.uk/handle/10283/2353">SIWIS</a></td> <td>CC BY 4.0</td> <td>11</td> <td>fr</td> <td>✅</td> </tr> <tr> - <td>[Speech Commands](http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz)</td> + <td><a href="http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz">Speech Commands</a></td> <td>CC BY 4.0</td> <td>18</td> <td>en</td> <td>✅</td> </tr> <tr> - <td>[VCTK](https://datashare.ed.ac.uk/handle/10283/3443)</td> + <td><a href="https://datashare.ed.ac.uk/handle/10283/3443">VCTK</a></td> <td>CC BY 4.0</td> <td>44</td> <td>en</td> <td>✅</td> </tr> <tr> - <td rowspan="2">[VoxPopuli](https://github.com/facebookresearch/voxpopuli)</td> + <td rowspan="2"><a href="https://github.com/facebookresearch/voxpopuli">VoxPopuli</a></td> <td rowspan="2">CC 0</td> <td>383,500</td> <td>bg, hr, cs, da, nl, en, et, fi, fr, de, el, hu, it, lv, lt, mt, pl, pt, ro, sk, sl, es, sv</td> @@ -141,7 +141,7 @@ <td>✅</td> </tr> <tr> - <td rowspan="2">[YouTube-Commons](https://huggingface.co/datasets/PleIAs/YouTube-Commons)</td> + <td rowspan="2"><a href="https://huggingface.co/datasets/PleIAs/YouTube-Commons">YouTube-Commons</a></td> <td rowspan="2">CC BY 4.0</td> <td>3,261</td> <td>bg, cs, nl, en, et, fr, de, el, hu, it, pl, pt, ro, es</td> @@ -153,7 +153,7 @@ <td>✅</td> </tr> <tr> - <td>[MOSEL :grapes:](https://huggingface.co/datasets/FBK-MT/mosel)</td> + <td><a href="https://huggingface.co/datasets/FBK-MT/mosel">MOSEL :grapes: <a/></td> <td>CC BY 4.0</td> <td>441,206</td> <td>bg, hr, cs, da, nl, en, et, fi, fr, de, el, hu, it, lv, lt, mt, pl, pt, ro, sk, sl, es, sv</td>