<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
 <record>
  <leader>03649ntm a22006377i 4500</leader>
  <controlfield tag="001">000260329</controlfield>
  <controlfield tag="003">CZ-PrVSE</controlfield>
  <controlfield tag="005">20220604071223.0</controlfield>
  <controlfield tag="006">m        d</controlfield>
  <controlfield tag="007">cr n||||||||||</controlfield>
  <controlfield tag="008">220604s2022    xr     fsbm   000 0 eng d</controlfield>
  <datafield tag="STA" ind1=" " ind2=" ">
   <subfield code="a">NEZPRACOVANÝ IMPORT</subfield>
  </datafield>
  <datafield tag="040" ind1=" " ind2=" ">
   <subfield code="a">ABA006</subfield>
   <subfield code="b">cze</subfield>
   <subfield code="c">ABA006</subfield>
   <subfield code="d">ABA006</subfield>
   <subfield code="e">rda</subfield>
  </datafield>
  <datafield tag="100" ind1="1" ind2=" ">
   <subfield code="a">Kočí, Ondřej</subfield>
   <subfield code="%">ISIS:133117</subfield>
   <subfield code="4">dis</subfield>
  </datafield>
  <datafield tag="242" ind1="1" ind2="0">
   <subfield code="a">Extrakce prozódických vlastností a syntéza zpěvu pomocí end-to-end sekvence neurálních modulů</subfield>
   <subfield code="y">eng</subfield>
  </datafield>
  <datafield tag="245" ind1="1" ind2="0">
   <subfield code="a">Prosodic feature extraction and singing voice synthesis with an End-to-end Neural network model sequence /</subfield>
   <subfield code="c">Ondřej Kočí</subfield>
  </datafield>
  <datafield tag="264" ind1=" " ind2="0">
   <subfield code="c">2022</subfield>
  </datafield>
  <datafield tag="300" ind1=" " ind2=" ">
   <subfield code="a">?? stran :</subfield>
   <subfield code="3">digital, PDF soubor</subfield>
  </datafield>
  <datafield tag="500" ind1=" " ind2=" ">
   <subfield code="a">Vedoucí práce: Jan Mittner</subfield>
  </datafield>
  <datafield tag="502" ind1=" " ind2=" ">
   <subfield code="a">Diplomová práce (Ing.)—Vysoká škola ekonomická v Praze. Fakulta informatiky a statistiky, 2022</subfield>
  </datafield>
  <datafield tag="504" ind1=" " ind2=" ">
   <subfield code="a">Obsahuje bibliografii</subfield>
  </datafield>
  <datafield tag="516" ind1=" " ind2=" ">
   <subfield code="a">Textový (vysokoškolská kvalifikační práce)</subfield>
  </datafield>
  <datafield tag="518" ind1=" " ind2=" ">
   <subfield code="a">Rok obhajoby 2022</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
   <subfield code="a">Prosody is an intrinsic aspect of human speech. However, most popular speech synthesizers ignore it or only use its average representation when synthesizing artificial voices. This thesis proposes a new End-to-End model sequence prototype based on the Deep Neural Network architecture. Utilizing Mellotron (Rafael Valle et al. 2019), Tacotron (Yuxuan Wang et al. 2017), WaveGlow (Ryan Prenger et al. 2018) and other neural models, it can synthesize waveforms with prosody by extracting it from a source audio clip and applying it to synthesized audio. Unlike other models, this prototype uses a popular framework Nvidia NeMo for neural module development (Oleksii Kuchaiev et al. 2019). The prototype can synthesize singing voice clips with pitch and rhythm transfer, which allows it to preserve original characteristics of the source speaker. The prototype also works with datasets based solely on speaking utterances, and therefore does not require a singing corpus to train. This prototype serves as a proof of concept for real-time singing voice synthesis with advanced control over prosodic features supplied via a source audio implemented largely using the models provided by the Nvidia NeMo framework.</subfield>
  </datafield>
  <datafield tag="538" ind1=" " ind2=" ">
   <subfield code="a">Způsob přístupu: Internet</subfield>
  </datafield>
  <datafield tag="653" ind1="0" ind2=" ">
   <subfield code="a">vývoj informačních systémů [obor dipl. práce]</subfield>
  </datafield>
  <datafield tag="655" ind1=" " ind2="7">
   <subfield code="a">diplomové práce</subfield>
   <subfield code="7">fd132022</subfield>
   <subfield code="2">czenas</subfield>
  </datafield>
  <datafield tag="655" ind1=" " ind2="9">
   <subfield code="a">master's theses</subfield>
   <subfield code="2">eczenas</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Mellotron</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Tacotron</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Neural model</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Neural networks</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">AI</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Convolution</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Neuron</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">NeMo</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Prosody</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Prosodic feature</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Singing</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Neural module</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Mittner, Jan</subfield>
   <subfield code="%">ISIS:40388</subfield>
   <subfield code="4">ths</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Polák, Petr</subfield>
   <subfield code="4">opn</subfield>
  </datafield>
  <datafield tag="710" ind1="2" ind2=" ">
   <subfield code="a">Vysoká škola ekonomická v Praze.</subfield>
   <subfield code="b">Fakulta informatiky a statistiky</subfield>
   <subfield code="7">kn20010709399</subfield>
   <subfield code="4">dgg</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/78588/podrobnosti</subfield>
   <subfield code="y">VŠKP v InSIS</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/78588</subfield>
   <subfield code="y">Hlavní práce</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/78588/posudek/vedouci</subfield>
   <subfield code="y">Hodnocení vedoucího</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/78588/posudek/oponent/74056</subfield>
   <subfield code="y">Oponentura</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/78588/priloha/23796</subfield>
   <subfield code="y">Přiloha k práci</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/78588/priloha/23797</subfield>
   <subfield code="y">Přiloha k práci</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/78588/priloha/23824</subfield>
   <subfield code="y">Přiloha k práci</subfield>
  </datafield>
  <datafield tag="999" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/78588/podrobnosti</subfield>
   <subfield code="y">dc:identifier</subfield>
  </datafield>
  <datafield tag="993" ind1=" " ind2=" ">
   <subfield code="x">NEPOSILAT</subfield>
   <subfield code="y">VSKP</subfield>
  </datafield>
  <datafield tag="999" ind1="4" ind2="9">
   <subfield code="a">vse78588</subfield>
   <subfield code="b">220603</subfield>
  </datafield>
  <datafield tag="999" ind1="4" ind2="5">
   <subfield code="x">78588</subfield>
  </datafield>
 </record>
</collection>
