The design of TimeML as an expressive language for temporal information brings promises, and challenges; in particular, its representational properties raise the bar for traditional information extraction methods applied to the task of text-to-TimeML analysis. A reference corpus, such as TimeBank, is an invaluable asset in this situation; however, certain characteristics of TimeBank---size and consistency, primarily---present challenges of their own. We discuss the design, implementation, and performance of an automatic TimeML-compliant annotator, trained on TimeBank, and deploying a hybrid analytical strategy of mixing aggressive finite-state processing over linguistic annotations with a state-of-the-art machine learning technique capable of leveraging large amounts of unannotated data. The results we report are encouraging in the light of a close analysis of TimeBank; at the same time they are indicative of the need for more infrastructure work, especially in the direction of creating a larger and more robust reference corpus.
@InProceedings{boguraev_et_al:DagSemProc.05151.11, author = {Boguraev, Branimir and Ando, Rie Kubota}, title = {{TimeBank-Driven TimeML Analysis}}, booktitle = {Annotating, Extracting and Reasoning about Time and Events}, pages = {1--22}, series = {Dagstuhl Seminar Proceedings (DagSemProc)}, ISSN = {1862-4405}, year = {2005}, volume = {5151}, editor = {Graham Katz and James Pustejovsky and Frank Schilder}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/DagSemProc.05151.11}, URN = {urn:nbn:de:0030-drops-3354}, doi = {10.4230/DagSemProc.05151.11}, annote = {Keywords: TimeML analysis, TimeBank corpus, TimeML-compliant temporal information extraction, finite-state processing, machine learning, corpus analysis} }
Feedback for Dagstuhl Publishing