Další formáty:
BibTeX
LaTeX
RIS
@inproceedings{53147, author = {Sotolář, Ondřej and Plhák, Jaromír and Tkaczyk, Michal and Lebedíková, Michaela and Šmahel, David}, address = {Brno}, booktitle = {Proceedings of the 16th Workshop on Recent Advances in Slavonic Natural Languages Processing, RASLAN 2022}, keywords = {Dialogue Dataset;Dataset Split;Online Conversations}, howpublished = {tištěná verze "print"}, language = {eng}, location = {Brno}, isbn = {978-80-263-1752-4}, pages = {131-139}, publisher = {Tribun EU}, title = {Constructing Datasets from Dialogue Data}, url = {https://nlp.fi.muni.cz/raslan/raslan22.pdf#page=141}, year = {2022} }
TY - JOUR ID - 53147 AU - Sotolář, Ondřej - Plhák, Jaromír - Tkaczyk, Michal - Lebedíková, Michaela - Šmahel, David PY - 2022 TI - Constructing Datasets from Dialogue Data PB - Tribun EU CY - Brno SN - 9788026317524 KW - Dialogue Dataset;Dataset Split;Online Conversations UR - https://nlp.fi.muni.cz/raslan/raslan22.pdf#page=141 N2 - We present methods for transforming raw dialogue data into a dataset suitable for processing with statistical NLP models. We reveal the potential pitfalls for processing this type of data, such as ensuring the representatives of the sample, the generalization ability of models, and the definition of the local context of the utterances. We use novel methods to solve these problems and demonstrate their effectiveness on an utterance classification problem. As a result, this paper provides guidelines for generating valuable datasets from dialogue data. ER -
SOTOLÁŘ, Ondřej, Jaromír PLHÁK, Michal TKACZYK, Michaela LEBEDÍKOVÁ a David ŠMAHEL. Constructing Datasets from Dialogue Data. In \textit{Proceedings of the 16th Workshop on Recent Advances in Slavonic Natural Languages Processing, RASLAN 2022}. Brno: Tribun EU, 2022, s.~131-139. ISBN~978-80-263-1752-4.
|