Creative Commons Attribution 4.0 International license
The design of TimeML as an expressive language for temporal information brings promises, and challenges; in particular, its representational properties raise the bar for traditional information extraction methods applied to the task of text-to-TimeML analysis. A reference corpus, such as TimeBank, is an invaluable asset in this situation; however, certain characteristics of TimeBank---size and consistency, primarily---present challenges of their own. We discuss the design, implementation, and performance of an automatic TimeML-compliant annotator, trained on TimeBank, and deploying a hybrid analytical strategy of mixing aggressive finite-state processing over linguistic annotations with a state-of-the-art machine learning technique capable of leveraging large amounts of unannotated data. The results we report are encouraging in the light of a close analysis of TimeBank; at the same time they are indicative of the need for more infrastructure work, especially in the direction of creating a larger and more robust reference corpus.
@InProceedings{boguraev_et_al:DagSemProc.05151.11,
author = {Boguraev, Branimir and Ando, Rie Kubota},
title = {{TimeBank-Driven TimeML Analysis}},
booktitle = {Annotating, Extracting and Reasoning about Time and Events},
pages = {1--22},
series = {Dagstuhl Seminar Proceedings (DagSemProc)},
ISSN = {1862-4405},
year = {2005},
volume = {5151},
editor = {Graham Katz and James Pustejovsky and Frank Schilder},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/DagSemProc.05151.11},
URN = {urn:nbn:de:0030-drops-3354},
doi = {10.4230/DagSemProc.05151.11},
annote = {Keywords: TimeML analysis, TimeBank corpus, TimeML-compliant temporal information extraction, finite-state processing, machine learning, corpus analysis}
}