Even in the 21st century, paper dictionaries are still compiled and developed using standard word processors. Many publishing companies are, nowadays, working on converting their dictionaries into computer readable documents, so that they can be used to prepare new features, such as making them available online. Luckily, most of these publishers can pay review teams to fix and even enhance these dictionaries. Unfortunately, research institutions cannot hire that amount of workers. In this article we present the process of retreading a Galician dictionary that was first developed and compiled using Microsoft Word. This dictionary was converted, through automatic rewriting, into a Text Encoding Initiative schema subset. This process will be detailed, and the problems found will be discussed. Given a recent normative that changed the Galician orthography, the dictionary has undergone a semi-automatic modernization process. Finally, two applications for the obtained dictionaries will be shown.
@InProceedings{guinovart_et_al:OASIcs.SLATE.2013.115, author = {Guinovart, Xavier G\'{o}mez and Sim\~{o}es, Alberto}, title = {{Retreading Dictionaries for the 21st Century}}, booktitle = {2nd Symposium on Languages, Applications and Technologies}, pages = {115--126}, series = {Open Access Series in Informatics (OASIcs)}, ISBN = {978-3-939897-52-1}, ISSN = {2190-6807}, year = {2013}, volume = {29}, editor = {Leal, Jos\'{e} Paulo and Rocha, Ricardo and Sim\~{o}es, Alberto}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.SLATE.2013.115}, URN = {urn:nbn:de:0030-drops-40333}, doi = {10.4230/OASIcs.SLATE.2013.115}, annote = {Keywords: dictionary, markup language, language processing, lexical information retrieval, Galician language} }
Feedback for Dagstuhl Publishing