With the new interest in historical documents insight grew that electronic access to these texts causes many specific problems. In the first part of the paper we survey the present role of digital historical documents. After collecting central facts and observations on historical language change we comment on the difficulties that result for retrieval and data mining on historical texts. In the second part of the paper we report on our own work in the area with a focus on special matching strategies that help to relate modern language keywords with old variants. The basis of our studies is a collection of documents from the Early New High German period. These texts come with a very rich spectrum on word variants and spelling variations.
@InProceedings{hauser_et_al:DagSemProc.06491.9, author = {Hauser, Andreas and Heller, Markus and Leiss, Elisabeth and Schulz, Klaus U. and Wanzeck, Christiane}, title = {{Information Access to Historical Documents from the Early New High German Period}}, booktitle = {Digital Historical Corpora- Architecture, Annotation, and Retrieval}, pages = {1--8}, series = {Dagstuhl Seminar Proceedings (DagSemProc)}, ISSN = {1862-4405}, year = {2007}, volume = {6491}, editor = {Lou Burnard and Milena Dobreva and Norbert Fuhr and Anke L\"{u}deling}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/DagSemProc.06491.9}, URN = {urn:nbn:de:0030-drops-10573}, doi = {10.4230/DagSemProc.06491.9}, annote = {Keywords: Historical documents, information access, Early New High German, historical language, information retrieval, word similarity, approximate matching} }
Feedback for Dagstuhl Publishing