Information extraction from semi-structured documents requires to find n-ary queries in trees that define appropriate sets of n-tuples of nodes. We propose new representation formalisms for n-ary queries by tree automata that we prove to capture MSO. We then investigate n-ary queries by unambiguous tree automata which are relevant for query induction in multi-slot information extraction. We show that this representation formalism captures the class of n-ary queries that are finite unions of Cartesian closed queries, a property we prove decidable.
@InProceedings{niehren_et_al:DagSemProc.05061.5, author = {Niehren, Joachim and Planque, Laurent and Talbot, Jean-Marc and Tison, Sophie}, title = {{N-ary Queries by Tree Automata}}, booktitle = {Foundations of Semistructured Data}, pages = {1--15}, series = {Dagstuhl Seminar Proceedings (DagSemProc)}, ISSN = {1862-4405}, year = {2005}, volume = {5061}, editor = {Frank Neven and Thomas Schwentick and Dan Suciu}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/DagSemProc.05061.5}, URN = {urn:nbn:de:0030-drops-2263}, doi = {10.4230/DagSemProc.05061.5}, annote = {Keywords: Information extraction, semistructured documents, node selecting queries in trees} }
Feedback for Dagstuhl Publishing