Creative Commons Attribution 3.0 Unported license
We study the problem of learning properties of nodes in tree structures. Those properties are specified by logical formulas, such as formulas from first-order or monadic second-order logic. We think of the tree as a database encoding a large dataset and therefore aim for learning algorithms which depend at most sublinearly on the size of the tree. We present a learning algorithm for quantifier-free formulas where the running time only depends polynomially on the number of training examples, but not on the size of the background structure. By a previous result on strings we know that for general first-order or monadic second-order (MSO) formulas a sublinear running time cannot be achieved. However, we show that by building an index on the tree in a linear time preprocessing phase, we can achieve a learning algorithm for MSO formulas with a logarithmic learning phase.
@InProceedings{grienenberger_et_al:LIPIcs.ICDT.2019.24,
author = {Grienenberger, Emilie and Ritzert, Martin},
title = {{Learning Definable Hypotheses on Trees}},
booktitle = {22nd International Conference on Database Theory (ICDT 2019)},
pages = {24:1--24:18},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-101-6},
ISSN = {1868-8969},
year = {2019},
volume = {127},
editor = {Barcelo, Pablo and Calautti, Marco},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICDT.2019.24},
URN = {urn:nbn:de:0030-drops-103261},
doi = {10.4230/LIPIcs.ICDT.2019.24},
annote = {Keywords: monadic second-order logic, trees, query learning}
}