In this paper I examine the applicability of SMT methodology for part-of-speech disambiguation and lemmatization in Hungarian. After the baseline system was created, different methods and possibilities were used to improve the efficiency of the system. I also applied some methods to decrease the size of the target dictionary and to find a proper solution to handle out-of-vocabulary words. The results show that such a light-weight system performs comparable results to other state-of-the-art systems.
@InProceedings{laki:OASIcs.SLATE.2012.267, author = {Laki, L\'{a}szl\'{o}}, title = {{Investigating the Possibilities of Using SMT for Text Annotation}}, booktitle = {1st Symposium on Languages, Applications and Technologies}, pages = {267--283}, series = {Open Access Series in Informatics (OASIcs)}, ISBN = {978-3-939897-40-8}, ISSN = {2190-6807}, year = {2012}, volume = {21}, editor = {Sim\~{o}es, Alberto and Queir\'{o}s, Ricardo and da Cruz, Daniela}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.SLATE.2012.267}, URN = {urn:nbn:de:0030-drops-35285}, doi = {10.4230/OASIcs.SLATE.2012.267}, annote = {Keywords: SMT, POS-tagging, Lemmatization, Target language set, OOV} }
Feedback for Dagstuhl Publishing