<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
 <record>
  <leader>03011ntm a22005657i 4500</leader>
  <controlfield tag="001">000723044</controlfield>
  <controlfield tag="003">CZ-PrVSE</controlfield>
  <controlfield tag="005">20250621144446.0</controlfield>
  <controlfield tag="006">m        d</controlfield>
  <controlfield tag="007">cr n||||||||||</controlfield>
  <controlfield tag="008">250621s2025    xr     fsbm   000 0 eng d</controlfield>
  <datafield tag="STA" ind1=" " ind2=" ">
   <subfield code="a">NEZPRACOVANÝ IMPORT</subfield>
  </datafield>
  <datafield tag="040" ind1=" " ind2=" ">
   <subfield code="a">ABA006</subfield>
   <subfield code="b">cze</subfield>
   <subfield code="c">ABA006</subfield>
   <subfield code="d">ABA006</subfield>
   <subfield code="e">rda</subfield>
  </datafield>
  <datafield tag="100" ind1="1" ind2=" ">
   <subfield code="a">Frolov, Ilia</subfield>
   <subfield code="%">ISIS:162517</subfield>
   <subfield code="4">dis</subfield>
  </datafield>
  <datafield tag="242" ind1="1" ind2="0">
   <subfield code="a">Využití syntetických textových dat pro trénování modelů analýzy sentimentu</subfield>
   <subfield code="y">eng</subfield>
  </datafield>
  <datafield tag="245" ind1="1" ind2="0">
   <subfield code="a">USING SYNTHETIC TEXT DATA TO TRAIN SENTIMENT ANALYSIS MODELS /</subfield>
   <subfield code="c">Ilia Frolov</subfield>
  </datafield>
  <datafield tag="264" ind1=" " ind2="0">
   <subfield code="c">2025</subfield>
  </datafield>
  <datafield tag="300" ind1=" " ind2=" ">
   <subfield code="a">?? stran :</subfield>
   <subfield code="3">digital, PDF soubor</subfield>
  </datafield>
  <datafield tag="500" ind1=" " ind2=" ">
   <subfield code="a">Vedoucí práce: David Chudán</subfield>
  </datafield>
  <datafield tag="502" ind1=" " ind2=" ">
   <subfield code="a">Bakalářská práce (Bc.)—Vysoká škola ekonomická v Praze. Fakulta informatiky a statistiky, 2025</subfield>
  </datafield>
  <datafield tag="504" ind1=" " ind2=" ">
   <subfield code="a">Obsahuje bibliografii</subfield>
  </datafield>
  <datafield tag="516" ind1=" " ind2=" ">
   <subfield code="a">Textový (vysokoškolská kvalifikační práce)</subfield>
  </datafield>
  <datafield tag="518" ind1=" " ind2=" ">
   <subfield code="a">Rok obhajoby 2025</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
   <subfield code="a">The explosive growth of digital text data presents many opportunities and challenges, especially in the area of sentiment analysis. A common problem is the imbalance of datasets, where one sentiment class significantly outperforms others, resulting in prediction bias. In this study, the class imbalance problem is addressed by creating synthetic data using GPT-2, a state-of-the-art generative language model. Comprehensive experiments were conducted using logistic regression, long term short-term memory (LSTM), and bidirectional encoding representations from transformers (BERT) models. The evaluation showed significant results, in particular BERT, which achieved accuracies up to 74.2% when trained solely on synthetic data. This highlights the value of using synthetic data in NLP applications. The study suggests opportunities for future research using more advanced language models and broader NLP applications.</subfield>
  </datafield>
  <datafield tag="538" ind1=" " ind2=" ">
   <subfield code="a">Způsob přístupu: Internet</subfield>
  </datafield>
  <datafield tag="653" ind1="0" ind2=" ">
   <subfield code="a">data analytics [obor bakal. práce]</subfield>
  </datafield>
  <datafield tag="655" ind1=" " ind2="7">
   <subfield code="a">bakalářské práce</subfield>
   <subfield code="7">fd132403</subfield>
   <subfield code="2">czenas</subfield>
  </datafield>
  <datafield tag="655" ind1=" " ind2="9">
   <subfield code="a">bachelor's theses</subfield>
   <subfield code="2">eczenas</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Synthetic data</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">GPT-2</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Sentiment analysis</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Logistic Regression</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">LSTM</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">BERT</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Class imbalance</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">Data augmentation</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2=" ">
   <subfield code="a">NLP</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Chudán, David</subfield>
   <subfield code="7">js20100609002</subfield>
   <subfield code="4">ths</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Vencovský, Filip</subfield>
   <subfield code="7">mzk2015883325</subfield>
   <subfield code="4">opn</subfield>
  </datafield>
  <datafield tag="710" ind1="2" ind2=" ">
   <subfield code="a">Vysoká škola ekonomická v Praze.</subfield>
   <subfield code="b">Fakulta informatiky a statistiky</subfield>
   <subfield code="7">kn20010709399</subfield>
   <subfield code="4">dgg</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/90135/podrobnosti</subfield>
   <subfield code="y">VŠKP v InSIS</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/90135</subfield>
   <subfield code="y">Hlavní práce</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/90135/posudek/vedouci</subfield>
   <subfield code="y">Hodnocení vedoucího</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/90135/posudek/oponent/86260</subfield>
   <subfield code="y">Oponentura</subfield>
  </datafield>
  <datafield tag="999" ind1="4" ind2="0">
   <subfield code="u">https://insis.vse.cz/zp/90135/podrobnosti</subfield>
   <subfield code="y">dc:identifier</subfield>
  </datafield>
  <datafield tag="993" ind1=" " ind2=" ">
   <subfield code="x">NEPOSILAT</subfield>
   <subfield code="y">VSKP</subfield>
  </datafield>
  <datafield tag="999" ind1="4" ind2="9">
   <subfield code="a">vse90135</subfield>
   <subfield code="b">250617</subfield>
  </datafield>
  <datafield tag="999" ind1="4" ind2="5">
   <subfield code="x">90135</subfield>
  </datafield>
 </record>
</collection>
