Having access to a large set of stories is a necessary first step for robust and wide-ranging computational narrative modeling; happily, language data - including stories - are increasingly available in electronic form. Unhappily, the process of automatically separating stories from other forms of written discourse is not straightforward, and has resulted in a data collection bottleneck. Therefore researchers have sought to develop reliable, robust automatic algorithms for identifying story text mixed with other non-story text. In this paper we report on the reimplementation and experimental comparison of the two approaches to this task: Gordon's unigram classifier, and Corman's semantic triplet classifier. We cross-analyze their performance on both Gordon's and Corman's corpora, and discuss similarities, differences, and gaps in the performance of these classifiers, and point the way forward to improving their approaches.
@InProceedings{eisenberg_et_al:OASIcs.CMN.2016.6, author = {Eisenberg, Joshua D. and Yarlott, W. Victor H. and Finlayson, Mark A.}, title = {{Comparing Extant Story Classifiers: Results \& New Directions}}, booktitle = {7th Workshop on Computational Models of Narrative (CMN 2016)}, pages = {6:1--6:10}, series = {Open Access Series in Informatics (OASIcs)}, ISBN = {978-3-95977-020-0}, ISSN = {2190-6807}, year = {2016}, volume = {53}, editor = {Miller, Ben and Lieto, Antonio and Ronfard, R\'{e}mi and Ware, Stephen G. and Finlayson, Mark A.}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.CMN.2016.6}, URN = {urn:nbn:de:0030-drops-67079}, doi = {10.4230/OASIcs.CMN.2016.6}, annote = {Keywords: Story Detection, Machine Learning, Natural Language Processing, Perceptron Learning} }
Feedback for Dagstuhl Publishing