The task of paraphrase identification has been applied to diverse scenarios in Natural Language Processing, such as Machine Translation, summarization, or plagiarism detection. In this paper we present a comparative study on the performance of lexical, syntactic and semantic features in the task of paraphrase identification in the Microsoft Research Paraphrase Corpus. In our experiments, semantic features do not represent a gain in results, and syntactic features lead to the best results, but only if combined with lexical features.
@InProceedings{fialho_et_al:OASIcs.SLATE.2019.9, author = {Fialho, Pedro and Coheur, Lu{\'\i}sa and Quaresma, Paulo}, title = {{From Lexical to Semantic Features in Paraphrase Identification}}, booktitle = {8th Symposium on Languages, Applications and Technologies (SLATE 2019)}, pages = {9:1--9:11}, series = {Open Access Series in Informatics (OASIcs)}, ISBN = {978-3-95977-114-6}, ISSN = {2190-6807}, year = {2019}, volume = {74}, editor = {Rodrigues, Ricardo and Janou\v{s}ek, Jan and Ferreira, Lu{\'\i}s and Coheur, Lu{\'\i}sa and Batista, Fernando and Gon\c{c}alo Oliveira, Hugo}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.SLATE.2019.9}, URN = {urn:nbn:de:0030-drops-108763}, doi = {10.4230/OASIcs.SLATE.2019.9}, annote = {Keywords: paraphrase identification, lexical features, syntactic features, semantic features} }
Feedback for Dagstuhl Publishing