Creative Commons Attribution 3.0 Unported license
Having access to a large set of stories is a necessary first step for robust and wide-ranging computational narrative modeling; happily, language data - including stories - are increasingly available in electronic form. Unhappily, the process of automatically separating stories from other forms of written discourse is not straightforward, and has resulted in a data collection bottleneck. Therefore researchers have sought to develop reliable, robust automatic algorithms for identifying story text mixed with other non-story text. In this paper we report on the reimplementation and experimental comparison of the two approaches to this task: Gordon's unigram classifier, and Corman's semantic triplet classifier. We cross-analyze their performance on both Gordon's and Corman's corpora, and discuss similarities, differences, and gaps in the performance of these classifiers, and point the way forward to improving their approaches.
@InProceedings{eisenberg_et_al:OASIcs.CMN.2016.6,
author = {Eisenberg, Joshua D. and Yarlott, W. Victor H. and Finlayson, Mark A.},
title = {{Comparing Extant Story Classifiers: Results \& New Directions}},
booktitle = {7th Workshop on Computational Models of Narrative (CMN 2016)},
pages = {6:1--6:10},
series = {Open Access Series in Informatics (OASIcs)},
ISBN = {978-3-95977-020-0},
ISSN = {2190-6807},
year = {2016},
volume = {53},
editor = {Miller, Ben and Lieto, Antonio and Ronfard, R\'{e}mi and Ware, Stephen G. and Finlayson, Mark A.},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.CMN.2016.6},
URN = {urn:nbn:de:0030-drops-67079},
doi = {10.4230/OASIcs.CMN.2016.6},
annote = {Keywords: Story Detection, Machine Learning, Natural Language Processing, Perceptron Learning}
}