We investigate the problem of constructing a shape graph that describes the structure of a given graph database. We employ the framework of grammatical inference, where the objective is to find an inference algorithm that is both sound, i.e., always producing a schema that validates the input graph, and complete, i.e., able to produce any schema, within a given class of schemas, provided that a sufficiently informative input graph is presented. We identify a number of fundamental limitations that preclude feasible inference. We present inference algorithms based on natural approaches that allow to infer schemas that we argue to be of practical importance.
@InProceedings{groz_et_al:LIPIcs.ICDT.2022.14, author = {Groz, Beno\^{i}t and Lemay, Aur\'{e}lien and Staworko, S{\l}awek and Wieczorek, Piotr}, title = {{Inference of Shape Graphs for Graph Databases}}, booktitle = {25th International Conference on Database Theory (ICDT 2022)}, pages = {14:1--14:20}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-223-5}, ISSN = {1868-8969}, year = {2022}, volume = {220}, editor = {Olteanu, Dan and Vortmeier, Nils}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICDT.2022.14}, URN = {urn:nbn:de:0030-drops-158889}, doi = {10.4230/LIPIcs.ICDT.2022.14}, annote = {Keywords: RDF, Schema, Inference, Learning, Fitting, Minimality, Containment} }
Feedback for Dagstuhl Publishing