Creative Commons Attribution 3.0 Unported license
Having access to a large set of stories is a necessary first step for robust and wide-ranging computational narrative modeling; happily, language data - including stories - are increasingly available in electronic form. Unhappily, the process of automatically separating stories from other forms of written discourse is not straightforward, and has resulted in a data collection bottleneck. Therefore researchers have sought to develop reliable, robust automatic algorithms for identifying story text mixed with other non-story text. In this paper we report on the reimplementation and experimental comparison of the two approaches to this task: Gordon's unigram classifier, and Corman's semantic triplet classifier. We cross-analyze their performance on both Gordon's and Corman's corpora, and discuss similarities, differences, and gaps in the performance of these classifiers, and point the way forward to improving their approaches.
@InProceedings{eisenberg_et_al:OASIcs.CMN.2016.6,
  author =	{Eisenberg, Joshua D. and Yarlott, W. Victor H. and Finlayson, Mark A.},
  title =	{{Comparing Extant Story Classifiers: Results \& New Directions}},
  booktitle =	{7th Workshop on Computational Models of Narrative (CMN 2016)},
  pages =	{6:1--6:10},
  series =	{Open Access Series in Informatics (OASIcs)},
  ISBN =	{978-3-95977-020-0},
  ISSN =	{2190-6807},
  year =	{2016},
  volume =	{53},
  editor =	{Miller, Ben and Lieto, Antonio and Ronfard, R\'{e}mi and Ware, Stephen G. and Finlayson, Mark A.},
  publisher =	{Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
  address =	{Dagstuhl, Germany},
  URL =		{https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.CMN.2016.6},
  URN =		{urn:nbn:de:0030-drops-67079},
  doi =		{10.4230/OASIcs.CMN.2016.6},
  annote =	{Keywords: Story Detection, Machine Learning, Natural Language Processing, Perceptron Learning}
}