We consider the problem of answering shortest unique substring (SUS) queries on run-length encoded strings. For a string S, a unique substring u = S[i..j] is said to be a shortest unique substring (SUS) of S containing an interval [s, t] (i <= s <= t <= j) if for any i' <= s <= t <= j' with j-i > j'-i', S[i'..j'] occurs at least twice in S. Given a run-length encoding of size m of a string of length N, we show that we can construct a data structure of size O(m+pi_s(N, m)) in O(m log m + pi_c(N, m)) time such that queries can be answered in O(pi_q(N, m) + k) time, where k is the size of the output (the number of SUSs), and pi_s(N,m), pi_c(N,m), pi_q(N,m) are, respectively, the size, construction time, and query time for a predecessor/successor query data structure of m elements for the universe of [1,N]. Using the data structure by Beam and Fich (JCSS 2002), this results in a data structure of O(m) space that is constructed in O(m log m) time, and answers queries in O(sqrt(log m/loglog m)+k) time.
@InProceedings{mieno_et_al:LIPIcs.MFCS.2016.69, author = {Mieno, Takuya and Inenaga, Shunsuke and Bannai, Hideo and Takeda, Masayuki}, title = {{Shortest Unique Substring Queries on Run-Length Encoded Strings}}, booktitle = {41st International Symposium on Mathematical Foundations of Computer Science (MFCS 2016)}, pages = {69:1--69:11}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-016-3}, ISSN = {1868-8969}, year = {2016}, volume = {58}, editor = {Faliszewski, Piotr and Muscholl, Anca and Niedermeier, Rolf}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.MFCS.2016.69}, URN = {urn:nbn:de:0030-drops-65033}, doi = {10.4230/LIPIcs.MFCS.2016.69}, annote = {Keywords: string algorithms, shortest unique substring, run-length encoding} }
Feedback for Dagstuhl Publishing