In this research we explore a terminological database (Termoteca) in order to expand the Portuguese and Galician wordnets (PULO and Galnet) with the addition of new synset variants (word forms for a concept), usage examples for the variants, and synset glosses or definitions. The methodology applied in this experiment is based on the alignment between concepts of WordNet (synsets) and concepts described in Termoteca (terminological records), taking into account the lexical forms in both resources, their morphological category and their knowledge domains, using the information provided by the WordNet Domains Hierarchy and the Termoteca field domains to reduce the incidence of polysemy and homography in the results of the experiment. The results obtained confirm our hypothesis that the combined use of the semantic domain information included in both resources makes it possible to minimise the problem of lexical ambiguity and to obtain a very acceptable index of precision in terminological information extraction tasks, attaining a precision above 89% when there are two or more different languages sharing at least one lexical form between the synset in Galnet and the Termoteca record.
@InProceedings{simoes_et_al:OASIcs.SLATE.2019.6, author = {Sim\~{o}es, Alberto and G\'{o}mez Guinovart, Xavier}, title = {{Acquiring Domain-Specific Knowledge for WordNet from a Terminological Database}}, booktitle = {8th Symposium on Languages, Applications and Technologies (SLATE 2019)}, pages = {6:1--6:13}, series = {Open Access Series in Informatics (OASIcs)}, ISBN = {978-3-95977-114-6}, ISSN = {2190-6807}, year = {2019}, volume = {74}, editor = {Rodrigues, Ricardo and Janou\v{s}ek, Jan and Ferreira, Lu{\'\i}s and Coheur, Lu{\'\i}sa and Batista, Fernando and Gon\c{c}alo Oliveira, Hugo}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.SLATE.2019.6}, URN = {urn:nbn:de:0030-drops-108735}, doi = {10.4230/OASIcs.SLATE.2019.6}, annote = {Keywords: WordNet, Terminology, Lexical Resources, Natural Language Processing} }
Feedback for Dagstuhl Publishing