Creative Commons Attribution 4.0 International license
The Dagstuhl Seminar 'Similarity-based Clustering and its Application to Medicine and Biology' (07131) held in March 25--30, 2007, provided an excellent atmosphere for in-depth discussions about the research frontier of computational methods for relevant applications of biomedical clustering and beyond. We address some highlighted issues about correlation-based data analysis in this seminar postribution. First, some prominent correlation measures are briefly revisited. Then, a focus is put on Pearson correlation, because of its widespread use in biomedical sciences and because of its analytic accessibility. A connection to Euclidean distance of z-score transformed data outlined. Cost function optimization of correlation-based data representation is discussed for which, finally, applications to visualization and clustering of gene expression data are given.
@InProceedings{strickert_et_al:DagSemProc.07131.4,
author = {Strickert, Marc and Seiffert, Udo},
title = {{Correlation-based Data Representation}},
booktitle = {Similarity-based Clustering and its Application to Medicine and Biology},
pages = {1--16},
series = {Dagstuhl Seminar Proceedings (DagSemProc)},
ISSN = {1862-4405},
year = {2007},
volume = {7131},
editor = {Michael Biehl and Barbara Hammer and Michel Verleysen and Thomas Villmann},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/DagSemProc.07131.4},
URN = {urn:nbn:de:0030-drops-11347},
doi = {10.4230/DagSemProc.07131.4},
annote = {Keywords: Correlation, data representation, gradient-based optimization, clustering, neural gas}
}