diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1acc289 --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +__pycache__ +*.pt +*.log +*.swp +*.gz +*.pdf +*.log +*.bbl +*.blg +*.aux +*.auxlock +*.dpth +*.md5 +*.dep +*.table +*.dvi +*.gnuplot +*.fdb_latexmk +*.fls +*.out +*.spl +*.nav +*.toc +*.snm +*.jld2 +*.bson +.DS_Store +_cache_* + +data/ +models/ +logs/ +figs/ +checkpoint/ +evals/ +env.sh +jl_* diff --git a/DatasetParsers.jl b/DatasetParsers.jl new file mode 100644 index 0000000..065bf6a --- /dev/null +++ b/DatasetParsers.jl @@ -0,0 +1,43 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +export get_uttID2file +function get_uttID2file(dataset_path,folder::String) + uttID2file = Dict{String,String}() + for (root,dir,files) in walkdir(joinpath(dataset_path,folder);follow_symlinks=true) + wavs = files[findall(contains.(files,".wav"))] + folders = split(root, "/") + spkID = folders[end] + type = folders[end-1] + for f in wavs + sentenceID = split(f, "."; limit=2)[1] + uttID = "$(spkID)-$(type)-$(sentenceID)" + uttID2file[uttID] = joinpath(root,f) + end + end + return uttID2file +end + +export get_uttID2text +function get_uttID2text(uttID2file::Dict) + d = Dict( + 'z' => "ZERO", '3' => "THREE", '7' => "SEVEN", + 'o' => "OH", '4' => "FOUR", '8' => "EIGHT", + '1' => "ONE", '5' => "FIVE", '9' => "NINE", + '2' => "TWO", '6' => "SIX", 'a' =>"", 'b'=>"") + uttID2text = Dict{String,String}() + for uttID in keys(uttID2file) + text = split(uttID,"-")[3] + try + uttID2text[uttID] = strip(prod([d[t] for t in text].*" ")) + catch + error("$text is an invalid filename, invalid dataset!") + end + end + return uttID2text +end + +function get_uttID2wav(uttID2file::Dict; T=Float32) + uttID2wav = Dict(uttID => T.(load(uttID2file[uttID]).data)[:] + for uttID in keys(uttID2file)) +end diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..20c0e29 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +Written by Niccolò Antonello + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..4871acf --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,1159 @@ +# This file is machine-generated - editing it directly is not advised + +[[AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "0.5.0" + +[[AbstractTrees]] +deps = ["Markdown"] +git-tree-sha1 = "33e450545eaf7699da1a6e755f9ea65f14077a45" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.3.3" + +[[Adapt]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ffcfa2d345aaee0ef3d8346a073d5dd03c983ebe" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "3.2.0" + +[[ArgParse]] +deps = ["Logging", "TextWrap"] +git-tree-sha1 = "4a8f4df432fd8e8a96a142c53f9432b9022a92e6" +uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +version = "1.1.1" + +[[Artifacts]] +deps = ["Pkg"] +git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.3.0" + +[[BFloat16s]] +deps = ["LinearAlgebra", "Test"] +git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a" +uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" +version = "0.1.0" + +[[BSON]] +git-tree-sha1 = "dd36d7cf3d185eeaaf64db902c15174b22f5dafb" +uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +version = "0.2.6" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[Blosc]] +deps = ["Blosc_jll"] +git-tree-sha1 = "84cf7d0f8fd46ca6f1b3e0305b4b4a37afe50fd6" +uuid = "a74b3585-a348-5f62-a45c-50e91977d574" +version = "0.7.0" + +[[Blosc_jll]] +deps = ["Libdl", "Lz4_jll", "Pkg", "Zlib_jll", "Zstd_jll"] +git-tree-sha1 = "aa9ef39b54a168c3df1b2911e7797e4feee50fbe" +uuid = "0b7ba130-8d10-5ba8-a3d6-c5182647fed9" +version = "1.14.3+1" + +[[Bzip2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "c3598e525718abcc440f69cc6d5f60dda0a1b61e" +uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" +version = "1.0.6+5" + +[[CEnum]] +git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.1" + +[[CUDA]] +deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"] +git-tree-sha1 = "6ccc73b2d8b671f7a65c92b5f08f81422ebb7547" +uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" +version = "2.4.1" + +[[Cairo_jll]] +deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "e2f47f6d8337369411569fd45ae5753ca10394c6" +uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" +version = "1.16.0+6" + +[[ChainRules]] +deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Random", "Reexport", "Requires", "Statistics"] +git-tree-sha1 = "8cb44c68fcc2a6eef1ed603110251a5cd81dd3af" +uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" +version = "0.7.52" + +[[ChainRulesCore]] +deps = ["Compat", "LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "de4f08843c332d355852721adb1592bce7924da3" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "0.9.29" + +[[ClusterManagers]] +deps = ["Distributed", "Logging", "Pkg", "Sockets"] +git-tree-sha1 = "fdcb2d1c35096f74d40164154f4989f82bd8dd38" +repo-rev = "29f6a1fc39cf6182058c15408745585d3cc8dc18" +repo-url = "https://github.com/JuliaParallel/ClusterManagers.jl.git" +uuid = "34f1f09b-3a8b-5176-ab39-66d58a4d544e" +version = "0.4.0" + +[[CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.0" + +[[ColorSchemes]] +deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random", "StaticArrays"] +git-tree-sha1 = "3141757b5832ee7a0386db87997ee5a23ff20f4d" +uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4" +version = "3.10.2" + +[[ColorTypes]] +deps = ["FixedPointNumbers", "Random"] +git-tree-sha1 = "4bffea7ed1a9f0f3d1a131bbcd4b925548d75288" +uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" +version = "0.10.9" + +[[Colors]] +deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Reexport"] +git-tree-sha1 = "ac5f2213e56ed8a34a3dd2f681f4df1166b34929" +uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" +version = "0.12.6" + +[[CommonSubexpressions]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.0" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.25.0" + +[[CompilerSupportLibraries_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e695f735fca77e9708e795eda62afdb869cbb70" +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.3.4+0" + +[[Contour]] +deps = ["StaticArrays"] +git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7" +uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" +version = "0.5.7" + +[[DSP]] +deps = ["FFTW", "IterTools", "LinearAlgebra", "Polynomials", "Random", "Reexport", "SpecialFunctions", "Statistics"] +git-tree-sha1 = "2a63cb5fc0e8c1f0f139475ef94228c7441dc7d0" +uuid = "717857b8-e6f2-59f4-9121-6e50c889abd2" +version = "0.6.10" + +[[DataAPI]] +git-tree-sha1 = "dfb3b7e89e395be1e25c2ad6d7690dc29cc53b1d" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.6.0" + +[[DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.9" + +[[DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[DiffResults]] +deps = ["StaticArrays"] +git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.0.3" + +[[DiffRules]] +deps = ["NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "214c3fcac57755cfda163d91c58893a8723f93e9" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.0.2" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[EarCut_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "92d8f9f208637e8d2d28c664051a00569c01493d" +uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" +version = "2.1.5+1" + +[[Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "1402e52fcda25064f51c77a9655ce8680b76acf0" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.2.7+6" + +[[ExprTools]] +git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e" +uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" +version = "0.1.3" + +[[EzXML]] +deps = ["Printf", "XML2_jll"] +git-tree-sha1 = "0fa3b52a04a4e210aeb1626def9c90df3ae65268" +uuid = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" +version = "1.1.0" + +[[FFMPEG]] +deps = ["FFMPEG_jll", "x264_jll"] +git-tree-sha1 = "9a73ffdc375be61b0e4516d83d880b265366fe1f" +uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" +version = "0.4.0" + +[[FFMPEG_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "LibVPX_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "Pkg", "Zlib_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] +git-tree-sha1 = "3cc57ad0a213808473eafef4845a74766242e05f" +uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" +version = "4.3.1+4" + +[[FFTW]] +deps = ["AbstractFFTs", "FFTW_jll", "IntelOpenMP_jll", "Libdl", "LinearAlgebra", "MKL_jll", "Reexport"] +git-tree-sha1 = "8fda0934cb99db617171f7296dc361f4d6fa5424" +uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +version = "1.3.0" + +[[FFTW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "5a0d4b6a22a34d17d53543bd124f4b08ed78e8b0" +uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" +version = "3.3.9+7" + +[[FLAC_jll]] +deps = ["Libdl", "Ogg_jll", "Pkg"] +git-tree-sha1 = "cd528d5083a589aab69dc6de73a901efa9d69586" +uuid = "1d38b3a6-207b-531b-80e8-c83f48dafa73" +version = "1.3.3+2" + +[[FileIO]] +deps = ["Pkg"] +git-tree-sha1 = "fee8955b9dfa7bec67117ef48085fb2b559b9c22" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.4.5" + +[[FillArrays]] +deps = ["LinearAlgebra", "Random", "SparseArrays"] +git-tree-sha1 = "bed538ad14d132aa8240bb2e8ab82fcd2fd2f548" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "0.11.3" + +[[FiniteStateTransducers]] +deps = ["DataStructures", "Random"] +git-tree-sha1 = "42d75e0b4f7cbdc29911175ddaa61f14293c6f19" +uuid = "d0430b04-9e26-4b95-8372-754e5cd95c01" +version = "0.1.0" + +[[FixedPointNumbers]] +git-tree-sha1 = "d14a6fa5890ea3a7e5dcab6811114f132fec2b4b" +uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" +version = "0.6.1" + +[[Flux]] +deps = ["AbstractTrees", "Adapt", "CUDA", "CodecZlib", "Colors", "DelimitedFiles", "Functors", "Juno", "LinearAlgebra", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "SHA", "Statistics", "StatsBase", "Test", "ZipFile", "Zygote"] +git-tree-sha1 = "c443bf5a8329573a68364106b2c29bb6938dc6f5" +uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c" +version = "0.11.6" + +[[Fontconfig_jll]] +deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "35895cf184ceaab11fd778b4590144034a167a2f" +uuid = "a3f928ae-7b40-5064-980b-68af3947d34b" +version = "2.13.1+14" + +[[Formatting]] +deps = ["Printf"] +git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" +uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" +version = "0.4.2" + +[[ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "d48a40c0f54f29a5c8748cfb3225719accc72b77" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.16" + +[[FreeType2_jll]] +deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "cbd58c9deb1d304f5a245a0b7eb841a2560cfec6" +uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7" +version = "2.10.1+5" + +[[FriBidi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0d20aed5b14dd4c9a2453c1b601d08e1149679cc" +uuid = "559328eb-81f9-559d-9380-de523a88c83c" +version = "1.0.5+6" + +[[Functors]] +deps = ["MacroTools"] +git-tree-sha1 = "f40adc6422f548176bb4351ebd29e4abf773040a" +uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196" +version = "0.1.0" + +[[GLFW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll"] +git-tree-sha1 = "a1bbf700b5388bffc3d882f4f4d625cf1c714fd7" +uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89" +version = "3.3.2+1" + +[[GPUArrays]] +deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"] +git-tree-sha1 = "f99a25fe0313121f2f9627002734c7d63b4dd3bd" +uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +version = "6.2.0" + +[[GPUCompiler]] +deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "c853c810b52a80f9aad79ab109207889e57f41ef" +uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" +version = "0.8.3" + +[[GR]] +deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "LinearAlgebra", "Pkg", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"] +git-tree-sha1 = "aaebdf5588281c2902f499b49e67953f2b409c9c" +uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" +version = "0.54.0" + +[[GR_jll]] +deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Pkg", "Qt_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "8aee6fa096b0cbdb05e71750c978b96a08c78951" +uuid = "d2c73de3-f751-5644-a686-071e5b155ba9" +version = "0.53.0+0" + +[[GeometryBasics]] +deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] +git-tree-sha1 = "4d4f72691933d5b6ee1ff20e27a102c3ae99d123" +uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +version = "0.3.9" + +[[Gettext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "8c14294a079216000a0bdca5ec5a447f073ddc9d" +uuid = "78b55507-aeef-58d4-861c-77aaff3498b1" +version = "0.20.1+7" + +[[Glib_jll]] +deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "04690cc5008b38ecbdfede949220bc7d9ba26397" +uuid = "7746bdde-850d-59dc-9ae8-88ece973131d" +version = "2.59.0+4" + +[[Grisu]] +git-tree-sha1 = "03d381f65183cb2d0af8b3425fde97263ce9a995" +uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe" +version = "1.0.0" + +[[HDF5]] +deps = ["Blosc", "HDF5_jll", "Libdl", "Mmap", "Random"] +git-tree-sha1 = "0713cbabdf855852dfab3ce6447c87145f3d9ea8" +uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +version = "0.13.6" + +[[HDF5_jll]] +deps = ["Artifacts", "JLLWrappers", "LibCURL_jll", "Libdl", "OpenSSL_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "fd83fa0bde42e01952757f01149dd968c06c4dba" +uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" +version = "1.12.0+1" + +[[HMMGradients]] +deps = ["ChainRulesCore", "LinearAlgebra", "Random", "SparseArrays"] +git-tree-sha1 = "f8a736033242c0170ce8a808118b6c5097c9a374" +uuid = "ed22c0d8-4b10-4781-a02d-2b7b373fe96c" +version = "0.1.2" + +[[HTTP]] +deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets", "URIs"] +git-tree-sha1 = "942c1a9c750bbe79912b7bd060a420932afd35b8" +uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" +version = "0.9.3" + +[[IRTools]] +deps = ["InteractiveUtils", "MacroTools", "Test"] +git-tree-sha1 = "c67e7515a11f726f44083e74f218d134396d6510" +uuid = "7869d1d1-7146-5819-86e3-90919afe41df" +version = "0.4.2" + +[[IniFile]] +deps = ["Test"] +git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" +uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" +version = "0.5.0" + +[[IntelOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "d979e54b71da82f3a65b62553da4fc3d18c9004c" +uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" +version = "2018.0.3+2" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[IntervalSets]] +deps = ["Dates", "Statistics"] +git-tree-sha1 = "4214b48a62eb8f2c292b2ee34a508c256c0cdbc9" +uuid = "8197267c-284f-5f27-9208-e0e47529a953" +version = "0.3.2" + +[[Intervals]] +deps = ["Dates", "Printf", "RecipesBase", "Serialization", "TimeZones"] +git-tree-sha1 = "323a38ed1952d30586d0fe03412cde9399d3618b" +uuid = "d8418881-c3e1-53bb-8760-2df7ec849ed5" +version = "1.5.0" + +[[IterTools]] +git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18" +uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +version = "1.3.0" + +[[IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[JLD2]] +deps = ["CodecZlib", "DataStructures", "MacroTools", "Mmap", "Pkg", "Printf", "Requires", "UUIDs"] +git-tree-sha1 = "bb9a457481adf060ab5898823a49d4f854ff4ddd" +uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +version = "0.4.0" + +[[JLLWrappers]] +git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.2.0" + +[[JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.1" + +[[JpegTurbo_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9aff0587d9603ea0de2c6f6300d9f9492bbefbd3" +uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8" +version = "2.0.1+3" + +[[Juno]] +deps = ["Base64", "Logging", "Media", "Profile"] +git-tree-sha1 = "07cb43290a840908a771552911a6274bc6c072c7" +uuid = "e5e0dc1b-0480-54bc-9374-aad01c23163d" +version = "0.8.4" + +[[LAME_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "df381151e871f41ee86cee4f5f6fd598b8a68826" +uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" +version = "3.100.0+3" + +[[LLVM]] +deps = ["CEnum", "Libdl", "Printf", "Unicode"] +git-tree-sha1 = "b616937c31337576360cb9fb872ec7633af7b194" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "3.6.0" + +[[LZO_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f128cd6cd05ffd6d3df0523ed99b90ff6f9b349a" +uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac" +version = "2.10.0+3" + +[[LaTeXStrings]] +git-tree-sha1 = "c7aebfecb1a60d59c0fe023a68ec947a208b1e6b" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.2.0" + +[[Latexify]] +deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "Printf", "Requires"] +git-tree-sha1 = "3a0084cec7bf157edcb45a67fac0647f88fe5eaf" +uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" +version = "0.14.7" + +[[LibCURL_jll]] +deps = ["LibSSH2_jll", "Libdl", "MbedTLS_jll", "Pkg", "Zlib_jll", "nghttp2_jll"] +git-tree-sha1 = "897d962c20031e6012bba7b3dcb7a667170dad17" +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.70.0+2" + +[[LibGit2]] +deps = ["Printf"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[LibSSH2_jll]] +deps = ["Libdl", "MbedTLS_jll", "Pkg"] +git-tree-sha1 = "717705533148132e5466f2924b9a3657b16158e8" +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.9.0+3" + +[[LibSndFile]] +deps = ["FileIO", "Libdl", "LinearAlgebra", "Printf", "SampledSignals", "libsndfile_jll"] +git-tree-sha1 = "79b36b1457d209d7705e07d6e9bca9bb2524b520" +uuid = "b13ce0c6-77b0-50c6-a2db-140568b8d1a5" +version = "2.3.0" + +[[LibVPX_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "85fcc80c3052be96619affa2fe2e6d2da3908e11" +uuid = "dd192d2f-8180-539f-9fb4-cc70b1dcf69a" +version = "1.9.0+1" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[Libffi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "a2cd088a88c0d37eef7d209fd3d8712febce0d90" +uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490" +version = "3.2.1+4" + +[[Libgcrypt_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"] +git-tree-sha1 = "b391a18ab1170a2e568f9fb8d83bc7c780cb9999" +uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4" +version = "1.8.5+4" + +[[Libglvnd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"] +git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf" +uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29" +version = "1.3.0+3" + +[[Libgpg_error_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ec7f2e8ad5c9fa99fc773376cdbc86d9a5a23cb7" +uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8" +version = "1.36.0+3" + +[[Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e924324b2e9275a51407a4e06deb3455b1e359f" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.16.0+7" + +[[Libmount_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "51ad0c01c94c1ce48d5cad629425035ad030bfd5" +uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9" +version = "2.34.0+3" + +[[Libtiff_jll]] +deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Pkg", "Zlib_jll", "Zstd_jll"] +git-tree-sha1 = "291dd857901f94d683973cdf679984cdf73b56d0" +uuid = "89763e89-9b03-5906-acba-b20f662cd828" +version = "4.1.0+2" + +[[Libuuid_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f879ae9edbaa2c74c922e8b85bb83cc84ea1450b" +uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700" +version = "2.34.0+7" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[Lz4_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "51b1db0732bbdcfabb60e36095cc3ed9c0016932" +uuid = "5ced341a-0733-55b8-9ab6-a4889d929147" +version = "1.9.2+2" + +[[MFCC]] +deps = ["DSP", "Distributed", "FileIO", "HDF5", "SpecialFunctions", "Statistics", "WAV"] +git-tree-sha1 = "e8d6bb66e00f85ea7ba7f244da3b097d80825b3b" +uuid = "ca7b5df7-6146-5dcc-89ec-36256279a339" +version = "0.3.1" + +[[MKL_jll]] +deps = ["IntelOpenMP_jll", "Libdl", "Pkg"] +git-tree-sha1 = "eb540ede3aabb8284cb482aa41d00d6ca850b1f8" +uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" +version = "2020.2.254+0" + +[[MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.6" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[MbedTLS]] +deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"] +git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe" +uuid = "739be429-bea8-5141-9913-cc70e7f3736d" +version = "1.0.3" + +[[MbedTLS_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6" +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.16.8+1" + +[[Measures]] +git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f" +uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e" +version = "0.3.1" + +[[Media]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "75a54abd10709c01f1b86b84ec225d26e840ed58" +uuid = "e89f7d12-3494-54d1-8411-f7d8b9ae1f27" +version = "0.5.0" + +[[Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "f8c673ccc215eb50fcadb285f522420e29e69e1c" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "0.4.5" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[Mocking]] +deps = ["ExprTools"] +git-tree-sha1 = "916b850daad0d46b8c71f65f719c49957e9513ed" +uuid = "78c3b35d-d492-501b-9361-3d52fe80e533" +version = "0.7.1" + +[[NNlib]] +deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Pkg", "Requires", "Statistics"] +git-tree-sha1 = "df42d0816edfc24f5b82a728f46381613c4dff79" +uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +version = "0.7.14" + +[[NaNMath]] +git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "0.3.5" + +[[OffsetArrays]] +deps = ["Adapt"] +git-tree-sha1 = "76622f08645764e040b4d7e86d0ff471fd126ae4" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.5.3" + +[[Ogg_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "a42c0f138b9ebe8b58eba2271c5053773bde52d0" +uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" +version = "1.3.4+2" + +[[OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "71bbbc616a1d710879f5a1021bcba65ffba6ce58" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "1.1.1+6" + +[[OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9db77584158d0ab52307f8c04f8e7c08ca76b5b3" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.3+4" + +[[Opus_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f9d57f4126c39565e05a2b0264df99f497fc6f37" +uuid = "91d4177d-7536-5919-b921-800302f37372" +version = "1.3.1+3" + +[[OrderedCollections]] +git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.4.0" + +[[PCRE_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "1b556ad51dceefdbf30e86ffa8f528b73c7df2bb" +uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc" +version = "8.42.0+4" + +[[Parsers]] +deps = ["Dates"] +git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "1.0.15" + +[[Pixman_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6a20a83c1ae86416f0a5de605eaea08a552844a3" +uuid = "30392449-352a-5448-841d-b1acce4e97dc" +version = "0.40.0+0" + +[[Pkg]] +deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[PlotThemes]] +deps = ["PlotUtils", "Requires", "Statistics"] +git-tree-sha1 = "a3a964ce9dc7898193536002a6dd892b1b5a6f1d" +uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a" +version = "2.0.1" + +[[PlotUtils]] +deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"] +git-tree-sha1 = "ae9a295ac761f64d8c2ec7f9f24d21eb4ffba34d" +uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043" +version = "1.0.10" + +[[Plots]] +deps = ["Base64", "Contour", "Dates", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "JSON", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"] +git-tree-sha1 = "7ecf7d0207e7208a5cad9fd3bd357f5d5eb16044" +uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +version = "1.10.5" + +[[Polynomials]] +deps = ["Intervals", "LinearAlgebra", "OffsetArrays", "RecipesBase"] +git-tree-sha1 = "1c6c5b0c3713738d6b987903c529d80622c37e07" +uuid = "f27b6e38-b328-58d1-80ce-0feddd5e7a45" +version = "1.2.0" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[Profile]] +deps = ["Printf"] +uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" + +[[Qt_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "xkbcommon_jll"] +git-tree-sha1 = "7760cfea90bec61814e31dfb204fa4b81bba7b57" +uuid = "ede63266-ebff-546c-83e0-1c6fb6d0efc8" +version = "5.15.2+1" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[RecipesBase]] +git-tree-sha1 = "b3fb709f3c97bfc6e948be68beeecb55a0b340ae" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "1.1.1" + +[[RecipesPipeline]] +deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"] +git-tree-sha1 = "c4d54a78e287de7ec73bbc928ce5eb3c60f80b24" +uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c" +version = "0.3.1" + +[[Reexport]] +git-tree-sha1 = "57d8440b0c7d98fc4f889e478e80f268d534c9d5" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.0.0" + +[[Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "cfbac6c1ed70c002ec6361e7fd334f02820d6419" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.1.2" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[SampledSignals]] +deps = ["Compat", "DSP", "FFTW", "FixedPointNumbers", "IntervalSets", "LinearAlgebra", "TreeViews", "Unitful"] +git-tree-sha1 = "7f95f0a2ed381addc60101ed91d56f6b2c3f108b" +repo-rev = "6a9c6a58fe2a17baa8f5f074acd2b58aed0e3544" +repo-url = "https://github.com/JuliaAudio/SampledSignals.jl.git" +uuid = "bd7594eb-a658-542f-9e75-4c4d8908c167" +version = "2.1.0" + +[[Scratch]] +deps = ["Dates"] +git-tree-sha1 = "ad4b278adb62d185bbcb6864dc24959ab0627bf6" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.0.3" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Showoff]] +deps = ["Dates", "Grisu"] +git-tree-sha1 = "ee010d8f103468309b8afac4abb9be2e18ff1182" +uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f" +version = "0.3.2" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SortingAlgorithms]] +deps = ["DataStructures", "Random", "Test"] +git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "0.3.1" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[SpecialFunctions]] +deps = ["OpenSpecFun_jll"] +git-tree-sha1 = "d8d8b8a9f4119829410ecd706da4cc8594a1e020" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "0.10.3" + +[[StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "9da72ed50e94dbff92036da395275ed114e04d49" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.0.1" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] +git-tree-sha1 = "400aa43f7de43aeccc5b2e39a76a79d262202b76" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.33.3" + +[[StructArrays]] +deps = ["Adapt", "DataAPI", "Tables"] +git-tree-sha1 = "26ea43b4be7e919a2390c3c0f824e7eb4fc19a0a" +uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" +version = "0.5.0" + +[[TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.0" + +[[Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] +git-tree-sha1 = "a716dde43d57fa537a19058d044b495301ba6565" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.3.2" + +[[Test]] +deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[TextWrap]] +git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf" +uuid = "b718987f-49a8-5099-9789-dcd902bef87d" +version = "1.0.1" + +[[TimeZones]] +deps = ["Dates", "EzXML", "Mocking", "Pkg", "Printf", "RecipesBase", "Serialization", "Unicode"] +git-tree-sha1 = "4ba8a9579a243400db412b50300cd61d7447e583" +uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53" +version = "1.5.3" + +[[TimerOutputs]] +deps = ["Printf"] +git-tree-sha1 = "3318281dd4121ecf9713ce1383b9ace7d7476fdd" +uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +version = "0.5.7" + +[[TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.5" + +[[TreeViews]] +deps = ["Test"] +git-tree-sha1 = "8d0d7a3fe2f30d6a7f833a5f19f7c7a5b396eae6" +uuid = "a2a6695c-b41b-5b7d-aed9-dbfdeacea5d7" +version = "0.3.0" + +[[URIs]] +git-tree-sha1 = "7855809b88d7b16e9b029afd17880930626f54a2" +uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" +version = "1.2.0" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[Unitful]] +deps = ["LinearAlgebra", "Random"] +git-tree-sha1 = "92bdf0ccfa9612b167d0adaadef832a09971ceb0" +uuid = "1986cc42-f94f-5a68-af5c-568840ba703d" +version = "0.17.0" + +[[WAV]] +deps = ["Base64", "FileIO", "Libdl", "Logging"] +git-tree-sha1 = "21b46ff53c571693f617c151a0497ea7749bdeba" +uuid = "8149f6b0-98f6-5db9-b78f-408fbbb8ef88" +version = "1.1.0" + +[[Wayland_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "dc643a9b774da1c2781413fd7b6dcd2c56bb8056" +uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89" +version = "1.17.0+4" + +[[Wayland_protocols_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll"] +git-tree-sha1 = "2839f1c1296940218e35df0bbb220f2a79686670" +uuid = "2381bf8a-dfd0-557d-9999-79630e7b1b91" +version = "1.18.0+4" + +[[XML2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "be0db24f70aae7e2b89f2f3092e93b8606d659a6" +uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" +version = "2.9.10+3" + +[[XSLT_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "2b3eac39df218762d2d005702d601cd44c997497" +uuid = "aed1982a-8fda-507f-9586-7b0439959a61" +version = "1.1.33+4" + +[[Xorg_libX11_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"] +git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527" +uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc" +version = "1.6.9+4" + +[[Xorg_libXau_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e" +uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec" +version = "1.0.9+4" + +[[Xorg_libXcursor_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "12e0eb3bc634fa2080c1c37fccf56f7c22989afd" +uuid = "935fb764-8cf2-53bf-bb30-45bb1f8bf724" +version = "1.2.0+4" + +[[Xorg_libXdmcp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4" +uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05" +version = "1.1.3+4" + +[[Xorg_libXext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3" +uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3" +version = "1.3.4+4" + +[[Xorg_libXfixes_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4" +uuid = "d091e8ba-531a-589c-9de9-94069b037ed8" +version = "5.0.3+4" + +[[Xorg_libXi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"] +git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246" +uuid = "a51aa0fd-4e3c-5386-b890-e753decda492" +version = "1.7.10+4" + +[[Xorg_libXinerama_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll"] +git-tree-sha1 = "26be8b1c342929259317d8b9f7b53bf2bb73b123" +uuid = "d1454406-59df-5ea1-beac-c340f2130bc3" +version = "1.1.4+4" + +[[Xorg_libXrandr_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "34cea83cb726fb58f325887bf0612c6b3fb17631" +uuid = "ec84b674-ba8e-5d96-8ba1-2a689ba10484" +version = "1.5.2+4" + +[[Xorg_libXrender_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96" +uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa" +version = "0.9.10+4" + +[[Xorg_libpthread_stubs_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb" +uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74" +version = "0.1.0+3" + +[[Xorg_libxcb_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"] +git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6" +uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b" +version = "1.13.0+3" + +[[Xorg_libxkbfile_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "926af861744212db0eb001d9e40b5d16292080b2" +uuid = "cc61e674-0454-545c-8b26-ed2c68acab7a" +version = "1.1.0+4" + +[[Xorg_xcb_util_image_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "0fab0a40349ba1cba2c1da699243396ff8e94b97" +uuid = "12413925-8142-5f55-bb0e-6d7ca50bb09b" +version = "0.4.0+1" + +[[Xorg_xcb_util_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll"] +git-tree-sha1 = "e7fd7b2881fa2eaa72717420894d3938177862d1" +uuid = "2def613f-5ad1-5310-b15b-b15d46f528f5" +version = "0.4.0+1" + +[[Xorg_xcb_util_keysyms_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "d1151e2c45a544f32441a567d1690e701ec89b00" +uuid = "975044d2-76e6-5fbe-bf08-97ce7c6574c7" +version = "0.4.0+1" + +[[Xorg_xcb_util_renderutil_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "dfd7a8f38d4613b6a575253b3174dd991ca6183e" +uuid = "0d47668e-0667-5a69-a72c-f761630bfb7e" +version = "0.3.9+1" + +[[Xorg_xcb_util_wm_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "e78d10aab01a4a154142c5006ed44fd9e8e31b67" +uuid = "c22f9ab0-d5fe-5066-847c-f4bb1cd4e361" +version = "0.4.1+1" + +[[Xorg_xkbcomp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxkbfile_jll"] +git-tree-sha1 = "4bcbf660f6c2e714f87e960a171b119d06ee163b" +uuid = "35661453-b289-5fab-8a00-3d9160c6a3a4" +version = "1.4.2+4" + +[[Xorg_xkeyboard_config_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xkbcomp_jll"] +git-tree-sha1 = "5c8424f8a67c3f2209646d4425f3d415fee5931d" +uuid = "33bec58e-1273-512f-9401-5d533626f822" +version = "2.27.0+4" + +[[Xorg_xtrans_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845" +uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10" +version = "1.4.0+3" + +[[ZipFile]] +deps = ["Libdl", "Printf", "Zlib_jll"] +git-tree-sha1 = "c3a5637e27e914a7a445b8d0ad063d701931e9f7" +uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" +version = "0.9.3" + +[[Zlib_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "320228915c8debb12cb434c59057290f0834dbf6" +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.11+18" + +[[Zstd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "2c1332c54931e83f8f94d310fa447fd743e8d600" +uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" +version = "1.4.8+0" + +[[Zygote]] +deps = ["AbstractFFTs", "ChainRules", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"] +git-tree-sha1 = "52835a83f7c899cfcb95f796d584201812887ea8" +uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" +version = "0.6.3" + +[[ZygoteRules]] +deps = ["MacroTools"] +git-tree-sha1 = "9e7a1e8ca60b742e508a315c17eef5211e7fbfd7" +uuid = "700de1a5-db45-46bc-99cf-38207098b444" +version = "0.2.1" + +[[alsa_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ce395137bbc11d37ab57da33062c42de6b99d76e" +uuid = "45378030-f8ea-5b20-a7c7-1a9d95efb90e" +version = "1.2.1-1+2" + +[[libass_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "acc685bcf777b2202a904cdcb49ad34c2fa1880c" +uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" +version = "0.14.0+4" + +[[libfdk_aac_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "7a5780a0d9c6864184b3a2eeeb833a0c871f00ab" +uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" +version = "0.1.6+4" + +[[libpng_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "6abbc424248097d69c0c87ba50fcb0753f93e0ee" +uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f" +version = "1.6.37+6" + +[[libsndfile_jll]] +deps = ["Artifacts", "FLAC_jll", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg", "alsa_jll", "libvorbis_jll"] +git-tree-sha1 = "fc7a0233235d1c4da6dfdf73f480c2619e9ea66d" +uuid = "5bf562c0-5a39-5b4f-b979-f64ac885830c" +version = "1.0.28+2" + +[[libvorbis_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"] +git-tree-sha1 = "fa14ac25af7a4b8a7f61b287a124df7aab601bcd" +uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" +version = "1.3.6+6" + +[[nghttp2_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "8e2c44ab4d49ad9518f359ed8b62f83ba8beede4" +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.40.0+2" + +[[x264_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "d713c1ce4deac133e3334ee12f4adff07f81778f" +uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" +version = "2020.7.14+2" + +[[x265_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "487da2f8f2f0c8ee0e83f39d13037d6bbf0a45ab" +uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" +version = "3.0.0+3" + +[[xkbcommon_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"] +git-tree-sha1 = "ece2350174195bb31de1a63bea3a41ae1aa593b6" +uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd" +version = "0.9.1+5" diff --git a/Models.jl b/Models.jl new file mode 100644 index 0000000..ee24b75 --- /dev/null +++ b/Models.jl @@ -0,0 +1,67 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using Flux + +struct FullyConnected{T<:AbstractFloat} + M::Matrix{T} + b::Matrix{T} +end + +init_bias(Ny,Nx) = +2/sqrt(Float32(first(Flux.nfan(Ny,Nx)))) * Float32.(rand(Ny) .- 0.5) + +function FullyConnected(Nx::Int,Ny::Int) + M = Flux.kaiming_uniform(Ny,Nx)'[:,:] + b = reshape(init_bias(Ny,Nx),1,Ny) + return FullyConnected(M,b) +end + +# TODO: in future Flux versions this can be replaced by Dense +function (model::FullyConnected{T})(X::AbstractArray{T,3}) where {T} + Nt, Nx, Nb = size(X) + Ny = size(model.M,2) + + X = permutedims(X,(1,3,2)) + X = reshape(X,Nt*Nb,Nx) + + Y = X*model.M .+ model.b + + Y = reshape(Y,Nt,Nb,Ny) + Y = permutedims(Y,(1,3,2)) + return Y +end +Flux.@functor FullyConnected + +export get_convnet +function get_convnet(Nf,Ns; + Nhs=128*ones(Int,2), + Nks=[3,3], + strides=[1,3], + dilations=[1,2], + dropout=[0.0,0.0], + fout = x->logsoftmax(x,dims=2) + ) + T = Float32 + Nl = length(Nhs) + @assert length(Nhs) == length(Nks) == length(strides) == length(dilations) + convs = [Conv((Nks[i],), (i == 1 ? Nf : Nhs[i-1]) => Nhs[i], + stride=strides[i], + dilation=dilations[i], + pad=SamePad(), + init = Flux.kaiming_uniform, + bias = init_bias(Nhs[i], i==1 ? Nf : Nhs[i-1]) + ) for i=1:Nl] + bns = [BatchNorm(Nhs[i],relu) for i=1:Nl] + dro =[Dropout(dropout[i],dims=2) for i=1:Nl] + out = FullyConnected(Nhs[end],Ns) + layers = [] + for i=1:Nl + push!(layers,convs[i]) + push!(layers,bns[i]) + push!(layers,dro[i]) + end + push!(layers,out) + push!(layers,fout) + return Chain(layers...) +end diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..fc29eee --- /dev/null +++ b/Project.toml @@ -0,0 +1,15 @@ +[deps] +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +ClusterManagers = "34f1f09b-3a8b-5176-ab39-66d58a4d544e" +DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2" +FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +FiniteStateTransducers = "d0430b04-9e26-4b95-8372-754e5cd95c01" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +HMMGradients = "ed22c0d8-4b10-4781-a02d-2b7b373fe96c" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LibSndFile = "b13ce0c6-77b0-50c6-a2db-140568b8d1a5" +MFCC = "ca7b5df7-6146-5dcc-89ec-36256279a339" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +SampledSignals = "bd7594eb-a658-542f-9e75-4c4d8908c167" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/README.md b/README.md new file mode 100644 index 0000000..9b80205 --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ +# TIDIGITS recipe + +This repository contains a _recipe_ for training an ASR system using the [TIDIGITS database](https://catalog.ldc.upenn.edu/LDC93S10). +The recipe is entirely Julia-flavoured and uses following packages (among others): +* [Flux](https://github.com/FluxML/Flux.jl) as ML library +* [FiniteStateTransducers](https://github.com/idiap/FiniteStateTransducers.jl) for WFST compositions +* [HMMGradients](https://github.com/idiap/HMMGradients.jl) for maximum likelihood training + +Currently the training runs only on CPU and employs a simple greedy decoder. Stay tuned for more! + +### Installation + +Set in your environment the path `TIDIGITS_PATH=\your\path\to\tidigits`. +If you're using SGE set the command flags in `CPU_CMD`, i.e. the queue options. + +This can be done e.g. by running `source env.sh` before lunching Julia, where `env.sh` is a script that export these variables. +Alternatively, the environment variables can be specified [directly in the REPL](https://docs.julialang.org/en/v1/manual/environment-variables/). + +Run `julia --project -e 'using Pkg; Pkg.instantiate()'` to install all the dependencies. + +### Configuration + +Specify your current configuration in the folder `conf`. +The configuration files are loaded from the folder `conf/mysetup/`. +This folder must contain the following files: +* `feat_conf.jl` for feature extraction +* `model_conf.jl` for model and optimisation parameters (hyperparameters) +A couple of setups are present in this repository for reference in the folder `conf`. + +### Data preparation + +Run `julia --project prepare_data.jl --conf 2a` to extract feature and prepare training data using the configuration `2a`. +Features and transctiptions will be saved in the folder `data/uuid/`. +Here `uuid` is linked to `feat_conf.jl` file, meaning that if you create a new `model_conf.jl` without modifying feature extraction you don't need to run data preparation twice. +If SGE grid is available add the flag `--nj N` to split the work into `N` jobs. + +For the moment HMM configuration is fixed in `wfsts.jl` with a phone based 2-state HMM. + +### Training + +Training is performed running the script `julia --project prepare_data.jl --conf 2a`. +Notice that if you're just experimenting it is more convenient to run the experiment from Julia's REPL. +```julia +$ julia --project + +julia> include("train.jl") + +``` +Modify the `conf` by changing the default in the `ArgParse` table. + +### Evaluation + +Run the script `eval.jl` to calculate Word Error Rates (WER) and Phone Error Rate (PER). + +### Demo + +A live demo can be used by running `demo.jl` (requires [sox](http://sox.sourceforge.net/) to be installed in your system). diff --git a/RESULTS.md b/RESULTS.md new file mode 100644 index 0000000..86ac62a --- /dev/null +++ b/RESULTS.md @@ -0,0 +1,19 @@ +# Setup 1a (4 layer CNN with l1 regularisation) +* Phone Error Rate (PER): 0.899 % +* Word Error Rate (WER) : 1.217 % +* Accuracy: 0.965 + +# Setup 1b (4 layer CNN with softmax output) +* Phone Error Rate (PER): 1.477 % +* Word Error Rate (WER) : 2.074 % +* Accuracy: 0.943 + +# Setup 2a (8 layer CNN with l1 regularisation) +* Phone Error Rate (PER): 0.587 % +* Word Error Rate (WER) : 0.827 % +* Accuracy: 0.975 + +# Setup 2b (8 layer CNN with softmax output) +* Phone Error Rate (PER): 5.204 % +* Word Error Rate (WER) : 10.238 % +* Accuracy: 0.718 diff --git a/Utils.jl b/Utils.jl new file mode 100644 index 0000000..7d96fa3 --- /dev/null +++ b/Utils.jl @@ -0,0 +1,165 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using UUIDs + +export zeropad +function zeropad(x::Vector{Matrix{T}}) where {T} + Nt_max = maximum(size.(x,1)) + Nf = size(x[1],2) + Nb = length(x) + x_batched = zeros(T,Nt_max,Nf,Nb) + for (i,xi) in enumerate(x) + for f = 1:Nf, t = 1:size(xi,1) + x_batched[t,f,i] = xi[t,f] + end + end + return x_batched +end + +export posterior2phones +function posterior2phones(ippsym, gamma) + #z = [replace(ippsym[argmax(gamma[t,:])], r"[0-9\-]"=>"") for t in 1:size(gamma,1)] + p = [ippsym[argmax(gamma[t,:])] for t in 1:size(gamma,1)] # all phones symbols + k = [p[1]] # phones without repetitions + for t in 2:length(p) + if p[t-1] != p[t] + push!(k,p[t]) + end + end + k2 = String[] + for ki in k + if ki[end] == '1' # emitting symbol + push!(k2,replace(ki, r"[0-9\-]"=>"")) + end + end + return k2 +end + +export levenshtein +function levenshtein(s,t) + n,m = length(s),length(t) + D = zeros(Int,n+1,m+1) + + D[:,1] = 0:n # this is the cost we would have for insertion only + D[1,:] = 0:m # this is the cost we would have for deletion only + for i = 2:n+1, j = 2:m+1 + # check substition is needed + cost = s[i-1] == t[j-1] ? 0 : 1 + + D[i,j] = min( + D[i-1,j] + 1, # del + D[i,j-1] + 1, # ins + D[i-1,j-1] + cost, # subs / ok + ) + end + return D[n+1,m+1] +end + +export text2phones +function text2phones(lexicon,text; add_sil=true) + if add_sil + phones = [[lexicon[t]...,""] for t in split(text)] + else + phones = [lexicon[t] for t in split(text)] + end + phones = vcat(phones...) + if add_sil + phones = ["",phones...] + end + return phones +end + +export get_error_rate +function get_error_rate(uttID2seq::Dict, + uttID2seq_dec::Dict; kwargs...) + seqs, seq_decs = [], [] + for uttID in keys(uttID2seq) + push!(seqs,uttID2seq[uttID]) + push!(seq_decs,uttID2seq_dec[uttID]) + end + get_error_rate(seqs,seq_decs; kwargs...) +end + +function get_error_rate(seqs::Vector,seq_decs::Vector; is_split=false) + Nw = 0 + err = 0 + for i in eachindex(seqs) + seq, seq_dec = seqs[i], seq_decs[i] + if is_split == false + seq = split(seq;keepempty=false) + seq_dec = split(seq_dec; keepempty=false) + end + Nw += length(seq) + err += levenshtein(seq,seq_dec) + end + er = err/Nw +end + +export min_dist_word +function min_dist_word(prons,min_dist,word_phones) + d = [levenshtein(word_phones,pr) for pr in prons] + idxs = findall(d .<= min_dist) + if isempty(idxs) + return "" + else + return ilexicon[prons[idxs[argmin(d[idxs])]]] + end +end + +function add_word!(dec,prons,word_phones; min_dist=2) + t = try + ilexicon[word_phones] + catch + if min_dist == 0 + "" + else + min_dist_word(prons,min_dist,word_phones) + end + end + push!(dec,t) +end + +function phones2words_greedy(ilexicon,phones; min_dist=2) + prons = [keys(ilexicon)...] + word_phones = String[] + dec = String[] + for (i,p) in enumerate(phones) + if i == 1 + word_phones = String[] + if p != "" + push!(word_phones,p) + end + elseif (p == "") && i > 1 + if !isempty(word_phones) + add_word!(dec,prons,word_phones; min_dist=min_dist) + word_phones = String[] + end + else + if p != "" + push!(word_phones,p) + end + end + end + if !isempty(word_phones) + add_word!(dec,prons,word_phones; min_dist=min_dist) + end + return dec +end + +export check_env +function check_env() + if !("TIDIGITS_PATH" in keys(ENV)) + @warn "ENV[\"TIDIGITS_PATH\"] not exisitng: `export TIDIGITS_PATH=path/to/dataset` to your env." + end + if !("CPU_CMD" in keys(ENV)) + @warn "ENV[\"CPU_CMD\"] not exisitng: `export CPU_CMD='...'` to your env first. Only needed for SGE." + end +end + +export get_feat_dir +function get_feat_dir(setup; root="data") + uuid_folder = UUID("04a07b93-95e4-4b85-94b9-d3516eb06ea2") + conf = read("conf/$(setup)/feat_conf.jl",String) + return joinpath("data", string(uuid5(uuid_folder,conf))) +end diff --git a/WFSTs.jl b/WFSTs.jl new file mode 100644 index 0000000..ca5a9f2 --- /dev/null +++ b/WFSTs.jl @@ -0,0 +1,143 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using FiniteStateTransducers + +export get_L +# builds the L transducer +function get_L(lexicon::Dict{S,Vector{S}}) where {S<:AbstractString} + phones = sort!(unique!(vcat(values(lexicon)...))) + words = sort!([keys(lexicon)...]) + psym = Dict(p => i for (i,p) in enumerate(phones)) + wsym = Dict(w => i for (i,w) in enumerate(words )) + + L = WFST(psym,wsym) + add_states!(L,2) + initial!(L,1) + final!(L,2) + ϵ = get_eps(S) + c = 3 + for w in keys(lexicon) + pron=lexicon[w] + for (i,p) in enumerate(pron) + if i == 1 && (length(pron) != 1) + add_arc!(L,1,c,p,w,1) + elseif i == length(pron) + if i == 1 + add_arc!(L,1,2,p,w) + else + add_arc!(L,c,2,p,ϵ) + c += 1 + end + else + add_arc!(L,c,c+1,p,ϵ) + c += 1 + end + end + end + add_arc!(L,2,1,ϵ,ϵ) + return L +end + +export get_H +# builds the H transucer, 2 state phone per HMM +function get_H(psym; selfloop_prob=0.4) + isym = Dict{String,Int}() + c=1 + for p in sort([keys(psym)...]) + if p == "" + isym["$(p)1"] = c + isym["$(p)2"] = c+1 + c +=2 + else + for i=1:2 + isym["$p$i"] = c + c+=1 + end + end + end + Ns = length(psym) + + H = WFST(isym, psym) + add_states!(H,Ns+1) + initial!(H,1) + ϵ = get_eps(String) + for p in sort([keys(psym)...]) + if p == "" + # from initial state, assume silence + add_arc!(H, 1 , isym["1"]+1, "$(p)1", p) + end + # this avoids trivial solution of always staying in the same state + add_arc!(H, isym["$(p)1"]+1, isym["$(p)1"]+1, "$(p)1", ϵ,-log(selfloop_prob)) + # prob of transistion to other state unknown, set to 1 + add_arc!(H, isym["$(p)1"]+1, isym["$(p)2"]+1, "$(p)2", ϵ) + # to final state + final!(H,isym["$(p)2"]+1) + end + for s in keys(get_final(H)) + for p in keys(psym) + # prob of transistion to other phone unknown, set to 1 + add_arc!(H, s, isym["$(p)1"]+1, "$(p)1", p) # emitting state + end + end + return H +end + +export Hfst2trans +# convert the H transducer into transition matrix +function Hfst2trans(H::WFST) + Ns = length(get_isym(H)) + A = zeros(Float32,Ns,Ns) + state2outtr=Dict(i => (get_ilabel.(s),get_weight.(s)) for (i,s) in enumerate(H)) + for (p,s,n,d,e,a) in FiniteStateTransducers.DFS(H,1) + if d + intr = get_ilabel(a) + outtr,w = state2outtr[n] + for i in eachindex(outtr) + A[intr,outtr[i]] = exp(-get(w[i])) + end + end + end + return A +end + +export get_lexicon +function get_lexicon() + lexicon = Dict( + "" => [""], + "OH" => ["OW"], + "ZERO" => ["Z", "IH", "R", "OW"], + "ONE" => ["W", "AH", "N"], + "TWO" => ["T", "UW"], + "THREE" => ["TH", "R", "IY"], + "FOUR" => ["F", "AO", "R"], + "FIVE" => ["F", "AY", "V"], + "SIX" => ["S", "IH", "KS"], + "SEVEN" => ["S", "EH", "V", "AH", "N"], + "EIGHT" => ["EY", "T"], + "NINE" => ["N", "AY", "N"] + ) + ilexicon = Dict(lexicon[w] => w for w in keys(lexicon)) + return lexicon, ilexicon +end + +export get_HL +function get_HL(lexicon) + L = get_L(lexicon) + H = get_H(get_isym(L)) + return H,L +end + +export get_aA +function get_aA(H; use_log=true) + A = Hfst2trans(H) + Ns = size(A,1) + a = zeros(Float32,Ns) # initial state prob + a[H.isym["1"]] = one(Float32) + + if use_log + A .= log.(A) + a .= log.(a) + end + return a,A +end diff --git a/check_model.jl b/check_model.jl new file mode 100644 index 0000000..c64de42 --- /dev/null +++ b/check_model.jl @@ -0,0 +1,44 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello +# +# This script is mainly for testing the model input/output works correctly +# +using HMMGradients, FiniteStateTransducers +using Random, Statistics, FileIO +using BSON + +setup="2a" + +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") +include("conf/$(setup)/feat_conf.jl") +include("conf/$(setup)/model_conf.jl") + +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +Ns = size(A,1) + +# init model +modely = get_convnet(Nf,Ns; + Nks=Nks, + Nhs=Nhs, + strides=strides, + dilations=dilations, + dropout=dropout, + fout=fout) + +Nt,Nb = rand(500:1000),4 +x = zeros(Float32,Nt,Nf,Nb) +t,b = 500,1 +x[t,:,b] .= 1.0 + +y = modely(x) +z = sum(y[:,:,b],dims=2) +Nt2 = ceil(Int,Nt/3) +@assert Nt2 == size(y,1) +println("Setup = $setup") +println("Num of parameters = $(sum(prod.(size.(params(modely)))))") +println("Context bins = $(subsample*sum( (!).(z .≈ z[100]) ))") diff --git a/conf/1a/feat_conf.jl b/conf/1a/feat_conf.jl new file mode 100644 index 0000000..9faaa03 --- /dev/null +++ b/conf/1a/feat_conf.jl @@ -0,0 +1,21 @@ +Fs = 16000 +Nf = 40 # input feat dim +# feature extraction (saved to data) +get_feats(x) = identity(x) +# on the fly feature processing +function feats_post(x) + Fs_original=20000 + Fs=16000 + x = load(x) + if typeof(x) <: Tuple + x = x[1] + else + x = x.data + end + x = x[:] + x = resample(x,Fs//Fs_original) + x = mfcc(x, float(Fs); + dither=true, numcep=40, nbands=40, minfreq=20.0, maxfreq=7600.0)[1] + return Float32.(x) +end +subsample = 3 # out subsampling diff --git a/conf/1a/model_conf.jl b/conf/1a/model_conf.jl new file mode 100644 index 0000000..81242e2 --- /dev/null +++ b/conf/1a/model_conf.jl @@ -0,0 +1,20 @@ + +# Model Configuration +## obs likelihood model configuration +Random.seed!(313) +Nhs = [256,256,256,256] # hidden layer dims +dilations = [1,3,5,7] +Nks = 3 .*ones(Int,length(Nhs)) # conv kernel dims +strides = ones(Int,length(Nhs)) +strides[end] = subsample # output subsampling +dropout = zeros(length(Nhs)) +fout = identity # function in last layer + +# training opts +λ1 = 1f-5 # l1 output regularization +lr = 1f-3 # learning rate +Nb = 16 # batch size +epochs_cur = 5 +epochs = 15 +opt = ADAM(lr) +curriculum_training = true diff --git a/conf/1b/feat_conf.jl b/conf/1b/feat_conf.jl new file mode 100644 index 0000000..9faaa03 --- /dev/null +++ b/conf/1b/feat_conf.jl @@ -0,0 +1,21 @@ +Fs = 16000 +Nf = 40 # input feat dim +# feature extraction (saved to data) +get_feats(x) = identity(x) +# on the fly feature processing +function feats_post(x) + Fs_original=20000 + Fs=16000 + x = load(x) + if typeof(x) <: Tuple + x = x[1] + else + x = x.data + end + x = x[:] + x = resample(x,Fs//Fs_original) + x = mfcc(x, float(Fs); + dither=true, numcep=40, nbands=40, minfreq=20.0, maxfreq=7600.0)[1] + return Float32.(x) +end +subsample = 3 # out subsampling diff --git a/conf/1b/model_conf.jl b/conf/1b/model_conf.jl new file mode 100644 index 0000000..7dd1f01 --- /dev/null +++ b/conf/1b/model_conf.jl @@ -0,0 +1,20 @@ + +# Model Configuration +## obs likelihood model configuration +Random.seed!(1) +Nhs = [256,256,256,256] # hidden layer dims +dilations = [1,3,5,7] +Nks = 3 .*ones(Int,length(Nhs)) # conv kernel dims +strides = ones(Int,length(Nhs)) +strides[end] = subsample # output subsampling +dropout = zeros(length(Nhs)) +fout = x -> logsoftmax(x,dims=2) # function in last layer + +# training opts +λ1 = 0f-5 # l1 output regularization +lr = 1f-4 # learning rate +Nb = 16 # batch size +epochs_cur = 5 +epochs = 15 +opt = Flux.Optimise.Optimiser(WeightDecay(1e-5),ADAM(lr)) +curriculum_training = true diff --git a/conf/2a/feat_conf.jl b/conf/2a/feat_conf.jl new file mode 100644 index 0000000..1588484 --- /dev/null +++ b/conf/2a/feat_conf.jl @@ -0,0 +1,21 @@ +Fs = 16000 +Nf = 40 # input feat dim +# feature extraction (saved to data) +function get_feats(x) + Fs_original=20000 + Fs=16000 + x = load(x) + if typeof(x) <: Tuple + x = x[1] + else + x = x.data + end + x = x[:] + x = resample(x,Fs//Fs_original) + x = mfcc(x, float(Fs); + dither=true, numcep=40, nbands=40, minfreq=20.0, maxfreq=7600.0)[1] + return Float32.(x) +end +# on the fly feature processing +feats_post(x) = identity(x) +subsample = 3 # out subsampling diff --git a/conf/2a/model_conf.jl b/conf/2a/model_conf.jl new file mode 100644 index 0000000..42f473c --- /dev/null +++ b/conf/2a/model_conf.jl @@ -0,0 +1,20 @@ + +# Model Configuration +## obs likelihood model configuration +Random.seed!(313) +Nhs = [256,256,256,256,256,256,256,256] # hidden layer dims +dilations = [1,1,3,3,5,5,7,11] +Nks = 3 .*ones(Int,length(Nhs)) # conv kernel dims +strides = ones(Int,length(Nhs)) +strides[end] = subsample # output subsampling +dropout = zeros(length(Nhs)) +fout = identity # function in last layer + +# training opts +λ1 = 1f-5 # l1 output regularization +lr = 5f-4 # learning rate +Nb = 16 # batch size +epochs_cur = 5 +epochs = 15 +opt = ADAM(lr) +curriculum_training = true diff --git a/conf/2b/feat_conf.jl b/conf/2b/feat_conf.jl new file mode 100644 index 0000000..1588484 --- /dev/null +++ b/conf/2b/feat_conf.jl @@ -0,0 +1,21 @@ +Fs = 16000 +Nf = 40 # input feat dim +# feature extraction (saved to data) +function get_feats(x) + Fs_original=20000 + Fs=16000 + x = load(x) + if typeof(x) <: Tuple + x = x[1] + else + x = x.data + end + x = x[:] + x = resample(x,Fs//Fs_original) + x = mfcc(x, float(Fs); + dither=true, numcep=40, nbands=40, minfreq=20.0, maxfreq=7600.0)[1] + return Float32.(x) +end +# on the fly feature processing +feats_post(x) = identity(x) +subsample = 3 # out subsampling diff --git a/conf/2b/model_conf.jl b/conf/2b/model_conf.jl new file mode 100644 index 0000000..aabed16 --- /dev/null +++ b/conf/2b/model_conf.jl @@ -0,0 +1,20 @@ + +# Model Configuration +## obs likelihood model configuration +Random.seed!(11) +Nhs = [256,256,256,256,256,256,256,256] # hidden layer dims +dilations = [1,1,3,3,5,5,7,11] +Nks = 3 .*ones(Int,length(Nhs)) # conv kernel dims +strides = ones(Int,length(Nhs)) +strides[end] = subsample # output subsampling +dropout = zeros(length(Nhs)) +fout = x -> logsoftmax(x,dims=2) # function in last layer + +# training opts +λ1 = 0f-5 # l1 output regularization +lr = 1f-4 # learning rate +Nb = 16 # batch size +epochs_cur = 5 +epochs = 15 +opt = Flux.Optimise.Optimiser(WeightDecay(1e-5),ADAM(lr)) +curriculum_training = true diff --git a/demo.jl b/demo.jl new file mode 100644 index 0000000..e17c897 --- /dev/null +++ b/demo.jl @@ -0,0 +1,61 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using HMMGradients, Flux +using Random, Statistics, LinearAlgebra +using FiniteStateTransducers +using BSON, JLD2, LibSndFile, FileIO, UUIDs +using MFCC, DSP +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") + +setup="2a" +duration=5 # duration of recoring in seconds +plot_stuff=false + +println(" + TIDIGIT demo + + $duration seconds will be recorded + + The following digits can be recognized: + ZERO OH ONE TWO THREE FOUR FIVE SIX SEVEN EIGHT NINE + (requires sox) + ") + + +include("conf/$(setup)/feat_conf.jl") +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +ippsym = get_iisym(H) + +model_folder = joinpath("models","$setup") +BSON.@load joinpath(model_folder,"best_modely_final.bson") best_modely +Flux.testmode!(best_modely) + +mkpath("data") +file = "data/test.wav" +run(`sox -d -r 16k -c 1 --clobber $file trim 0 $duration`) +x = get_feats(file) +x = feats_post(x) + +y = best_modely(Flux.unsqueeze(x,3)) +gamma = logposterior(size(y,1),a,A,y[:,:]) +phones = posterior2phones(ippsym, gamma) +dec = phones2words_greedy(ilexicon,phones; min_dist=2) + +println("\nDecoded Phones") +println(strip(prod(phones.*" "))) +println("\nDecoded Digits") +println(strip(prod(dec.*" "))) + +if plot_stuff + using Plots + p1 = heatmap(x', title="Input feats") + p2 = heatmap(y[:,:]', clims = (-10,0), title=strip(prod(phones.*" "))) + p3 = heatmap(gamma', clims = (-10,0), title=strip(prod(dec.*" "))) + plot(p1,p2,p3,layout=(3,1)) +end diff --git a/eval.jl b/eval.jl new file mode 100644 index 0000000..df8fb9c --- /dev/null +++ b/eval.jl @@ -0,0 +1,69 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using ArgParse + +s = ArgParseSettings() +@add_arg_table! s begin + "--conf" + help = "configuration setup" + arg_type = String + default = "2a" +end +parsed_args = parse_args(ARGS, s) +setup = parsed_args["conf"] + +using HMMGradients, Flux, Zygote +using Random, Statistics, LinearAlgebra +using FiniteStateTransducers +using DSP, MFCC +using BSON, JLD2, LibSndFile, FileIO, UUIDs +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") + +include("conf/$(setup)/feat_conf.jl") + +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +ippsym = get_iisym(H) + +BSON.@load "models/$setup/best_modely_final.bson" best_modely +Flux.testmode!(best_modely) + +feat_dir = get_feat_dir(setup) +data = load(joinpath(feat_dir,"train.jld2")) +uttID2feats, uttID2text, uttID2phones = data["uttID2feats"], data["uttID2text"], data["uttID2phones"] + +uttID2text_dec = Dict() +uttID2phones_dec = Dict() +min_dist=2 + +for uttID in keys(uttID2feats) + x = uttID2feats[uttID] + x = feats_post(x) + y = best_modely(Flux.unsqueeze(x,3)) + gamma = logposterior(size(y,1),a,A,y[:,:]) + ps = posterior2phones(ippsym,gamma) + ws = phones2words_greedy(ilexicon,ps; min_dist=min_dist) + uttID2phones_dec[uttID] = ps + uttID2text_dec[uttID] = strip(prod(ws.*" ")) +end + +uttID2err_textdec = Dict{String,Tuple{String,String}}() +for uttID in keys(uttID2text) + text, dec = uttID2text[uttID], uttID2text_dec[uttID] + if text != dec + uttID2err_textdec[uttID] = (text,dec) + end +end + +accuracy = 1-length(uttID2err_textdec) / length(uttID2text) +wer = get_error_rate(uttID2text, uttID2text_dec) +per = get_error_rate(uttID2phones, uttID2phones_dec; is_split=true) +println("# Setup $setup") +println("* Phone Error Rate (PER): $(round(per * 100, digits=3)) %") +println("* Word Error Rate (WER) : $(round(wer * 100, digits=3)) %") +println("* Accuracy: $(round(accuracy,digits=3))") diff --git a/plotstuff.jl b/plotstuff.jl new file mode 100644 index 0000000..7971754 --- /dev/null +++ b/plotstuff.jl @@ -0,0 +1,61 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello +# +# This script can be used to check the +# output of the acoustic model and its decoding +# using two random utterances taken from the test set + +using HMMGradients, Flux, Zygote +using Random, Statistics, LinearAlgebra +using FiniteStateTransducers +using DSP, MFCC +using BSON, JLD2, LibSndFile, FileIO +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") + +setup="2a" + +include("conf/$(setup)/feat_conf.jl") +include("conf/$(setup)/model_conf.jl") + +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +ippsym = get_iisym(H) + +BSON.@load "models/$(setup)/current_modely.bson" best_modely +Flux.testmode!(best_modely) + +feat_dir = get_feat_dir(setup) +data = load(joinpath(feat_dir,"test.jld2")) +uttID2feats, uttID2text = data["uttID2feats"], data["uttID2text"] + +uttIDs = [keys(uttID2text)...] +i,j = rand(uttIDs),rand(uttIDs) + +xi,xj = feats_post(uttID2feats[i]), feats_post(uttID2feats[j]) +yi,yj = best_modely(Flux.unsqueeze(xi,3)),best_modely(Flux.unsqueeze(xj,3)) + +gammai = logposterior(size(yi,1),a,A,yi[:,:]) +gammaj = logposterior(size(yj,1),a,A,yj[:,:]) + +outi = posterior2phones(ippsym,gammai) +outj = posterior2phones(ippsym,gammaj) +outi[outi .== ""] .= " " +outj[outj .== ""] .= " " + +using Plots +pyplot() +psi = prod([prod(lexicon[w]) for w in split(uttID2text[i])].*" ") +p1i = heatmap(xi', title=uttID2text[i]) +p2i = heatmap(yi[:,:]', clims = (maximum(yj)-20,maximum(yj)), title=psi) +p3i = heatmap(gammai', clims = (-20,0), title=prod(outi)) + +psj = prod([prod(lexicon[w]) for w in split(uttID2text[j])].*" ") +p1j = heatmap(xj', title=uttID2text[j]) +p2j = heatmap(yj[:,:]', clims = (maximum(yj)-20,maximum(yj)), title=psj) +p3j = heatmap(gammaj', clims = (-20,0), title=prod(outj)) + +plot(p1i,p1j,p2i,p2j,p3i,p3j,layout=(3,2)) diff --git a/prepare_data.jl b/prepare_data.jl new file mode 100644 index 0000000..3ab56e4 --- /dev/null +++ b/prepare_data.jl @@ -0,0 +1,164 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using ArgParse +using Distributed, ClusterManagers +include("Utils.jl") + +function parse_commandline() + s = ArgParseSettings() + @add_arg_table! s begin + "--nj" + help = "number of jobs" + arg_type = Int + default = 1 + "--conf" + help = "configuration setup" + arg_type = String + default = "2a" + end + return parse_args(ARGS, s) +end + +# parse command line and add workers +parsed_args = parse_commandline() +nj, setup = parsed_args["nj"], parsed_args["conf"] +feat_dir = get_feat_dir(setup) +check_env() +if ispath(feat_dir) + error("Data already processed for this feature conf in $(feat_dir). Remove this folder to re-run feature extraction from scratch.") +end + +if nj > 1 + addprocs_sge(nj; + qsub_flags=split(ENV["CPU_CMD"]), + wd=mktempdir(pwd()), + exeflags="--project" + ) +end + +@everywhere begin + setup = $setup + using DSP, MFCC, HMMGradients, FiniteStateTransducers + using JLD2, LibSndFile, FileIO + include("Utils.jl") + include("WFSTs.jl") + include("DatasetParsers.jl") +end + +@everywhere function process_data(dataset_path,lexicon,L,H,Fs,subsample, + uttID2file,uttID2text,feat_dir,set,nj) + T = Float32 + + uttID2feats = Dict() + uttID2phones = Dict() + uttID2tr = Dict{String,Vector{Pair{Vector{Int},Vector{Int}}}}() + wsym = get_osym(L) + + for uttID in keys(uttID2file) + # process audio + x = uttID2file[uttID] + x = get_feats(x) + uttID2feats[uttID] = x + x = feats_post(x) + Nt = size(x,1) + + # process text + text = uttID2text[uttID] + uttID2phones[uttID] = text2phones(lexicon,text) + text = split(text;keepempty=false) + text = String.(vcat("",[[ti,""] for ti in text]...)) #silence between every word + S = linearfst(text,text, ones(typeofweight(L),length(text)), wsym, wsym) + HLS = rm_eps!(H∘(L∘S)) + Nt2 = subsample == 1 ? Nt : ceil(Int,Nt/3) + time2tr = wfst2tr(HLS,Nt2) + uttID2tr[uttID] = HMMGradients.t2tr2t2IJ(time2tr) + end + if nj > 1 + q = joinpath(feat_dir,"q_split_$set") + mkpath(q) + JLD2.@save joinpath(q,"$(myid()).jld2") uttID2feats uttID2tr uttID2phones + else + return uttID2feats, uttID2tr, uttID2text, uttID2phones + end +end + +function prepare_data(dataset_path,lexicon,L,H,Fs,subsample,set,feat_dir,nj) + println("Processing $set set with $nj jobs") + uttID2file = get_uttID2file(dataset_path,set) + uttID2text = get_uttID2text(uttID2file) + if nj > 1 + # split utterances + uttIDs = [keys(uttID2text)...] + Nu = length(uttIDs) + delta = div(Nu,nj) + uttIDss = [uttIDs[1+(i-1)*delta:(i==nj ? Nu : i*delta)] for i = 1:nj] + uttID2files = [filter(x -> x.first in uttIDs, uttID2file) for uttIDs in uttIDss] + uttID2texts = [filter(x -> x.first in uttIDs, uttID2text) for uttIDs in uttIDss] + pmap( + uttID2filetext -> + process_data(dataset_path,lexicon,L,H,Fs,subsample, + uttID2filetext[1],uttID2filetext[2], + feat_dir,set,nj), + zip(uttID2files,uttID2texts) + ) + uttID2feats = Dict() + uttID2phones = Dict() + uttID2tr = Dict{String,Vector{Pair{Vector{Int},Vector{Int}}}}() + println("Merging files") + q = joinpath(feat_dir,"q_split_$set") + for id in workers() + data = load(joinpath(q,"$id.jld2")) + uttID2feats_nj, uttID2tr_nj, uttID2phones_nj = + data["uttID2feats"], data["uttID2tr"], data["uttID2phones"] + merge!(uttID2feats , uttID2feats_nj ) + merge!(uttID2phones, uttID2phones_nj) + merge!(uttID2tr , uttID2tr_nj ) + end + rm(q;recursive=true) + return uttID2feats, uttID2tr, uttID2text, uttID2phones + else + process_data(dataset_path,lexicon,L,H,Fs,subsample,uttID2file,uttID2text,feat_dir,set,nj) + end +end + +### +@everywhere begin + include("conf/$(setup)/feat_conf.jl") + lexicon, ilexicon = get_lexicon() + H, L = get_HL(lexicon) +end + +dataset_path = ENV["TIDIGITS_PATH"] +T = @elapsed uttID2feats_train, uttID2tr_train, uttID2text_train, uttID2phones_train = +prepare_data(dataset_path,lexicon,L,H,Fs,subsample,"train",feat_dir,nj) +println("Done in $T sec") +T = @elapsed uttID2feats_test, uttID2tr_test, uttID2text_test, uttID2phones_test = +prepare_data(dataset_path,lexicon,L,H,Fs,subsample,"test",feat_dir,nj) +println("Done in $T sec") + +if nj > 1 + t = rmprocs(workers()) + wait(t) +end + +# test data in TIDIGITS has same size of train, so we repartition it +uttID2feats_all = merge(uttID2feats_train , uttID2feats_test ) +uttID2tr_all = merge(uttID2tr_train , uttID2tr_test ) +uttID2text_all = merge(uttID2text_train , uttID2text_test ) +uttID2phones_all= merge(uttID2phones_train, uttID2phones_test) +uttIDs_all = [keys(uttID2text_all)...] +Nu = length(uttIDs_all) +idx_train, idx_test = round(Int,Nu*0.7), round(Int,Nu*0.9) +set2uttID = Dict() +set2uttID["train"], set2uttID["test"], set2uttID["dev"] = uttIDs_all[1:idx_train], uttIDs_all[idx_train+1:idx_test], uttIDs_all[idx_test+1:end] + +mkpath(feat_dir) +for set in ("train","test","dev") + filename = set + uttID2feats = filter(x -> x.first in set2uttID[set], uttID2feats_all ) + uttID2tr = filter(x -> x.first in set2uttID[set], uttID2tr_all ) + uttID2text = filter(x -> x.first in set2uttID[set], uttID2text_all ) + uttID2phones = filter(x -> x.first in set2uttID[set], uttID2phones_all) + JLD2.@save joinpath(feat_dir,"$set.jld2") uttID2feats uttID2tr uttID2text uttID2phones +end diff --git a/train.jl b/train.jl new file mode 100644 index 0000000..8affba1 --- /dev/null +++ b/train.jl @@ -0,0 +1,156 @@ +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Niccolò Antonello + +using ArgParse + +s = ArgParseSettings() +@add_arg_table! s begin + "--conf" + help = "configuration setup" + arg_type = String + default = "1a" +end +parsed_args = parse_args(ARGS, s) +setup = parsed_args["conf"] + +using HMMGradients, Flux, Zygote +using Random, Statistics, LinearAlgebra +using FiniteStateTransducers +using DSP, MFCC +using BSON, JLD2, LibSndFile, FileIO, UUIDs + +include("WFSTs.jl") +include("Models.jl") +include("Utils.jl") +include("conf/$(setup)/feat_conf.jl") +include("conf/$(setup)/model_conf.jl") + +# get transition matrix +lexicon, ilexicon = get_lexicon() +H, L = get_HL(lexicon) +a, A = get_aA(H) +Ns = size(A,1) + +# init model +modely = get_convnet(Nf,Ns; + Nks=Nks, + Nhs=Nhs, + strides=strides, + dilations=dilations, + dropout=dropout, + fout=fout) + +# load training data +feat_dir = get_feat_dir(setup) +data = load(joinpath(feat_dir,"train.jld2")) +uttID2feats, uttID2tr, uttID2text, uttID2phones = +data["uttID2feats"], data["uttID2tr"], data["uttID2text"], data["uttID2phones"] +# curriculum data (isolated words) +uttID2text_cur = filter(x->length(split(x.second))==1,uttID2text) + +# load dev data +data = load(joinpath(feat_dir,"dev.jld2")) +uttID2feats_dev, uttID2tr_dev, uttID2text_dev, uttID2phones_dev = +data["uttID2feats"], data["uttID2tr"], data["uttID2text"], data["uttID2phones"] +# curriculum data (isolated words) +uttID2text_dev_cur = filter(x->length(split(x.second))==1,uttID2text_dev) + +# dataloaders +Xs_cur = [uttID2feats[uttID] for uttID in keys(uttID2text_cur) ] +Ys_cur = [uttID2tr[uttID] for uttID in keys(uttID2text_cur) ] +Xs = [uttID2feats[uttID] for uttID in keys(uttID2feats) ] +Ys = [uttID2tr[uttID] for uttID in keys(uttID2tr) ] + +Xs_test_cur = [uttID2feats_dev[uttID] for uttID in keys(uttID2text_dev_cur)] +Ys_test_cur = [uttID2phones_dev[uttID] for uttID in keys(uttID2text_dev_cur)] +Xs_test = [uttID2feats_dev[uttID] for uttID in keys(uttID2feats_dev)] +Ys_test = [uttID2phones_dev[uttID] for uttID in keys(uttID2feats_dev)] + +N_cur = length(Xs_cur) +N = length(Xs) +N_test = length(Xs_test) + +cur_data = Flux.Data.DataLoader((Xs_cur ,Ys_cur ), batchsize=Nb, shuffle=false) +train_data = Flux.Data.DataLoader((Xs ,Ys ), batchsize=Nb, shuffle=true) +test_data_cur = Flux.Data.DataLoader((Xs_test_cur,Ys_test_cur), batchsize=Nb) +test_data = Flux.Data.DataLoader((Xs_test,Ys_test), batchsize=Nb) + +# define maximum likelihood function +function loss(Nt,t2tr,A,x,λ1) + y = modely(x) + yp = exp.(y) + f = nlogMLlog(Nt,t2tr,A,y) + λ1 * norm(yp,1) + return f +end + +function test(modely,a,A,ippsym,test_data) + Flux.testmode!(modely) + Nw = 0 + err = 0 + for (x,ps) in test_data + x = feats_post.(x) + Nts = ceil.(Int,size.(x,1)/3) + xb = zeropad(x) + y = modely(xb) + for i in eachindex(Nts) + gamma = logposterior(Nts[i],a,A,view(y,:,:,i)) + ps_dec = posterior2phones(ippsym,gamma) + Nw += length(ps[i]) + err += levenshtein(ps[i],ps_dec) + end + end + per = err / Nw + Flux.trainmode!(modely) + return per +end + +function train!(modely,a,A,H,opt,λ1,epochs,train_data,test_data) + Flux.trainmode!(modely) + N = length(train_data.data[1]) + ps = Flux.params(modely) + best_per = Inf + best_modely = deepcopy(modely) + ippsym = get_iisym(H) + for e in 1:epochs + cost = 0 + for (x,t2trs) in train_data + x = feats_post.(x) + Nts = length.(t2trs) .+ 1 + xb = zeropad(x) + train_loss, back = + Zygote.pullback(() -> loss(Nts,t2trs,A,xb,λ1), ps) + if isnan(train_loss) | isinf(train_loss) + error("Nan/Inf cost function!!") + end + cost += train_loss + gs = back(one(Float32)) + Flux.update!(opt, ps, gs) + end + per = test(modely,a,A,ippsym,test_data) + save_best = per <= best_per + if save_best + best_modely = deepcopy(modely) + best_per = per + BSON.@save "models/$setup/current_modely.bson" best_modely + end + println("epoch: $e cost: $(round(cost/N,digits=4)) PER: $(round(per*100,digits=3))" * (save_best ? " ⋆ " : "")) + end + Flux.testmode!(best_modely) + Flux.testmode!(modely) + return best_modely, modely +end + +model_folder = joinpath("models","$setup") +mkpath(model_folder) +println("Using setup: $setup") +println(read("conf/$setup/model_conf.jl",String)) +if curriculum_training + println("Curriculum training with $N_cur isolated words") + best_modely, modely = train!(modely,a,A,H,opt,λ1,epochs_cur,cur_data,test_data_cur) + BSON.@save joinpath(model_folder,"best_modely_curriculum.bson") best_modely + modely = deepcopy(best_modely) +end +println("Training with $N utterances") +best_modely, modely = train!(modely,a,A,H,opt,λ1,epochs,train_data,test_data) +BSON.@save joinpath(model_folder,"best_modely_final.bson") best_modely +BSON.@save joinpath(model_folder,"modely.bson") modely