We examine finite words over an alphabet $Gamma={a,bar{a};b,bar{b}}$ of pairs of letters, where each word $w_1w_2...w_t$ is identical with its {it reverse complement} $bar{w_t}...bar{w_2}bar{w_1}$ (where $bar{bbar{a}}=a,bar{bar{b}}=b$). We seek the smallest $k$ such that every word of length $n,$ composed from $Gamma$, is uniquely determined by the set of its subwords of length up to $k$. Our almost sharp result ($ksim 2n/3$) is an analogue of a classical result for ``normal'' words. This classical problem originally was posed by M.P. Sch"utzenberger and I. Simon, and gained a considerable interest for several researchers, foremost by Vladimir Levenshtein. Our problem has its roots in bioinformatics.
@InProceedings{erdos_et_al:DagSemProc.06201.10, author = {Erd\"{o}s, P\'{e}ter L. and Ligeti, P\'{e}ter and Sziklai, P\'{e}ter and Torney, David C.}, title = {{Subwords in reverse-complement order}}, booktitle = {Combinatorial and Algorithmic Foundations of Pattern and Association Discovery}, pages = {1--8}, series = {Dagstuhl Seminar Proceedings (DagSemProc)}, ISSN = {1862-4405}, year = {2006}, volume = {6201}, editor = {Rudolf Ahlswede and Alberto Apostolico and Vladimir I. Levenshtein}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/DagSemProc.06201.10}, URN = {urn:nbn:de:0030-drops-7856}, doi = {10.4230/DagSemProc.06201.10}, annote = {Keywords: Reverse complement order, Reconstruction of words, Microarray experiments} }
Feedback for Dagstuhl Publishing