An overview of the concept of program similarity is presented. It divides similarity into two types - syntactic and semantic - and provides a review of eight categories of methods that may be used to measure program similarity. A summary of some applications of these methods is included. The paper is intended to be a starting point for a more comprehensive analysis of the subject of similarity in programs, which is critical to understand if progress is to be made in fields such as clone detection.
@InProceedings{walenstein_et_al:DagSemProc.06301.11, author = {Walenstein, Andrew and El-Ramly, Mohammad and Cordy, James R. and Evans, William S. and Mahdavi, Kiarash and Pizka, Markus and Ramalingam, Ganesan and von Gudenberg, J\"{u}rgen Wolff}, title = {{Similarity in Programs}}, booktitle = {Duplication, Redundancy, and Similarity in Software}, pages = {1--8}, series = {Dagstuhl Seminar Proceedings (DagSemProc)}, ISSN = {1862-4405}, year = {2007}, volume = {6301}, editor = {Rainer Koschke and Ettore Merlo and Andrew Walenstein}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/DagSemProc.06301.11}, URN = {urn:nbn:de:0030-drops-9681}, doi = {10.4230/DagSemProc.06301.11}, annote = {Keywords: Computer programs, similarity, code clone, software comparison, program metrics, Levenshtein distance, parameterized difference, feature space, shared} }
Feedback for Dagstuhl Publishing