/* file: gmse.c M. Costa 3 January 2019
Last revised 8 January 2019 (Benjamin Moody)
-------------------------------------------------------------------------------
gmse: calculates multiscale entropy with different coarse-graining methods (generalized multiscale entropy, GMSE)
Copyright (C) 2019 Madalena Costa
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, see .
You may contact the author by e-mail (mcosta3@bidmc.harvard.edu). For updates
to this software, please visit PhysioNet (https://www.physionet.org/).
_______________________________________________________________________________
*/
#include
#include
#include
#include
#define MAXSTR 1250 /* maximum string size */
#define M_MAX 10 /* maximum value of the parameter m */
#define SCALE_MAX 100 /* maximum scale value */
#define J_MIN 5 /* starting scale */
#define D 50 /* minimum number of matches assumed necessary
for "reliable" computation of SampEn */
/* Global variables */
char *prog, line[MAXSTR];
double *t, *rr, *cg;
int rr_nlin, rr_bufsize, *x, flag_time_rr=0;
/*
The program reads input files comprising either one column of
numbers (flag_time_rr=0) or two columns of numbers, time and RR
intervals (flag_time_rr=1).
The program derives coarse-grained time series (cg) that represent
the system's dynamics on different time scales. Beat number
information is saved in x.
Next, sample entropy is calculated for each cg time series.
The output comprises four columns: scale, sample entropy, number of
matches and r*sd values.
*/
/* Function prototypes */
void alloc_double (double **array, int n);
void alloc_int (int **array, int n);
double StandardDeviation (double *a, int n);
double SampleEntropy (double r, int j, int m, int n);
int CG_mean (int j);
int CG_SD (int j);
int CG_var (int j);
int CG_MAD (int j);
void usage ();
int main(int argc, char *argv[])
{
int i, j, m, i_min, i_max, scale_step, scale_max,
flag_r = 0, flag_x = 0, cg_meth, cg_nlin;
double r, sd;
/* The parameters for the computation of sample entropy are
m (vector length) and r (noise tolerance).
The variable cg_nlin is the length of the coarse-grained time
series (varies with scale).
The variable cg_meth takes the values 1 to 4 depending on the
coarse-graining procedure that is selected (1 mean; 2 sd; 3
variance; 4 mean absolute deviation.) */
prog = argv[0];
scale_max = 20;
scale_step = 1;
m = 2;
i_min = 0;
i_max = INT_MAX;
r = 0.15;
cg_meth = 2;
i = 0;
while (++i < argc && *argv[i] == '-') {
switch (argv[i][1]) {
case 'n':
if (i + 1 >= argc) {
fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
exit(1);
}
scale_max = atoi(argv[++i]);
break;
case 'a':
if (i + 1 >= argc) {
fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
exit(1);
}
scale_step = atoi(argv[++i]);
break;
case 'm':
if (i + 1 >= argc) {
fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
exit(1);
}
m = atoi(argv[++i]);
break;
case 'r':
if (i + 1 >= argc) {
fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
exit(1);
}
flag_r = 1;
r = atof(argv[++i]);
break;
case 'x':
if (i + 1 >= argc) {
fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
exit(1);
}
flag_x=1;
r = atof(argv[++i]);
break;
case 'i':
if (i + 1 >= argc) {
fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
exit(1);
}
i_min = atoi(argv[++i]);
break;
case 'I':
if (i + 1 >= argc) {
fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
exit(1);
}
i_max = atoi(argv[++i]);
break;
case 'c':
if (i + 1 >= argc) {
fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
exit(1);
}
cg_meth = atoi(argv[++i]);
break;
default:
usage();
exit(1);
}
}
if (scale_max <= 0 || scale_max > SCALE_MAX) {
fprintf(stderr, "%s: -n: invalid argument (must be between 1 and %d)\n",
prog, SCALE_MAX);
exit(1);
}
if (scale_step <= 0 || scale_step > scale_max - 1) {
fprintf(stderr, "%s: -a: invalid argument (must be between 1 and %d)\n",
prog, scale_max - 1);
exit(1);
}
if (m <= 0 || m >= M_MAX) {
fprintf(stderr, "%s: -m: invalid argument (must be between 1 and %d)\n",
prog, M_MAX - 1);
exit(1);
}
if (flag_r && flag_x) {
fprintf(stderr, "%s: cannot use both -r and -x\n", prog);
exit(1);
}
if (r <= 0) {
fprintf(stderr, "%s: %s: invalid argument (must be greater than 0)\n",
prog, (flag_r ? "-r" : "-x"));
exit(1);
}
if (cg_meth < 1 || cg_meth > 4) {
fprintf(stderr, "%s: -c: invalid argument (must be between 1 and 4)\n",
prog);
exit(1);
}
/* Memory Allocation */
rr_bufsize = 1024;
rr_nlin = 0;
alloc_double(&t, rr_bufsize);
alloc_double(&rr, rr_bufsize);
alloc_double(&cg, rr_bufsize);
alloc_int(&x, rr_bufsize);
/* Data Read from stdin */
j = -1;
while (fgets(line, MAXSTR, stdin) != NULL){
j++;
if (rr_nlin >= rr_bufsize) {
rr_bufsize *= 2;
alloc_double(&t, rr_bufsize);
alloc_double(&rr, rr_bufsize);
alloc_double(&cg, rr_bufsize);
alloc_int(&x, rr_bufsize);
}
if (j >= i_min && j <= i_max) {
if (sscanf(line, "%lf %lf", &t[j-i_min], &rr[j-i_min]) == 2){
rr_nlin = j - i_min + 1;
flag_time_rr = 1;
}
else if (sscanf(line, "%lf", &rr[j-i_min]) == 1)
rr_nlin = j - i_min + 1;
else {
fprintf(stderr, "The input file should contain two columns: time and rr interval.\n");
exit(1);
}
}
}
/* Deriving the coarse-grained time series & calculating sample entropy */
switch (cg_meth) {
case 1:
for (j = 1; j <= scale_max; j += scale_step) {
cg_nlin = CG_mean (j);
if (flag_x == 0 && j == 1){
sd = StandardDeviation (rr, rr_nlin);
r = r * sd;
}
SampleEntropy (r, j, m, cg_nlin);
}
break;
case 2:
for (j = J_MIN; j <= scale_max; j += scale_step){
cg_nlin = CG_SD (j);
if (flag_x == 0 && j == J_MIN){
sd = StandardDeviation (cg, cg_nlin);
r = r * sd;
}
SampleEntropy (r, j, m, cg_nlin);
}
break;
case 3:
for (j = J_MIN; j <= scale_max; j += scale_step) {
cg_nlin = CG_var (j);
if (flag_x == 0 && j == J_MIN){
sd = StandardDeviation (cg, cg_nlin);
r = r * sd;
}
SampleEntropy (r, j, m, cg_nlin);
}
break;
case 4:
for (j = J_MIN; j <= scale_max; j += scale_step){
cg_nlin = CG_MAD (j);
if (flag_x == 0 && j == J_MIN){
sd = StandardDeviation (cg, cg_nlin);
r = r * sd;
}
SampleEntropy (r, j, m, cg_nlin);
}
break;
default:
usage();
exit(1);
}
/* Free memory */
free(rr);
free(cg);
free(t);
free(x);
return 0;
}
void alloc_double(double **array, int n)
{
double *a;
if ((a = realloc(*array, n * sizeof (double))) == NULL) {
fprintf(stderr, "%s : insufficient memory\n", prog);
exit(2);
}
*array = a;
}
void alloc_int(int **array, int n)
{
int *a;
if ((a = realloc(*array, n * sizeof (int))) == NULL) {
fprintf(stderr, "%s : insufficient memory\n", prog);
exit(2);
}
*array = a;
}
double StandardDeviation(double *a, int count)
{
double sum = 0.0, sum2 = 0.0, sd;
int j;
for (j = 0; j < count; j++){
sum += a[j];
sum2 += a[j] * a[j];
}
sd = sqrt((sum2 - sum * sum / count) / (count - 1));
return (sd);
}
double SampleEntropy(double r, int j, int m, int cg_nlin)
{
int i, k, l, cont[SCALE_MAX][M_MAX+1],
discontinuity_t = 0, discontinuity_m = 0;
double SE[SCALE_MAX];
cg_nlin = cg_nlin - m; /* for 1<=i<=cg_nlin both vectors of length
m_max and m_max+1 are defined */
for (i = 0; i < M_MAX; i++)
cont[j][i] = 0;
for (i = 0; i < cg_nlin; ++i) {
discontinuity_t = 0;
/* checking if the components of the template vector are
consecutive */
for (k = 0; k < m; k++)
if (x[i+k+1] - x[i+k] != j)
discontinuity_t = 1;
if (discontinuity_t == 0 || flag_time_rr == 0) {
for (l = i + 1; l < cg_nlin; ++l) { /* self-matches are not counted */
/* checking if the components of the matching vector are
consecutive */
discontinuity_m = 0;
for (k = 0; k < m; k++)
if (x[l+k+1] - x[l+k] != j)
discontinuity_m = 1;
if (discontinuity_m == 0 || flag_time_rr == 0){
k = 0;
while (k <= m && fabs(cg[i+k] - cg[l+k]) <= r){
k++;
cont[j][k]++;
}
}
}
}
}
if (cont[j][m+1] == 0 || cont[j][m] == 0)
SE[j] = -log((double) 1 / ((cg_nlin) * (cg_nlin - 1)));
else
SE[j] = -log((double) cont[j][m+1] / cont[j][m]);
if (cont[j][m+1] < D) {
fprintf(stderr,
"%s: error: At scale %d, the number of matches (with r*sd=%.3f)\n"
" for %d-component vectors is %d. The time series length\n"
" may be too short, or the r value too restrictive, to\n"
" analyze this scale.\n\n",
prog, j, r, m + 1, cont[j][m+1]);
exit(1);
}
printf ("%d\t%.4lf\t%d/%d\t\t%lf\n", j, SE[j], cont[j][m+1], cont[j][m], r);
return SE[j];
}
int CG_mean(int j)
{
int i = 0, l, n = 0, length, discontinuity;
while (i < rr_nlin - j + 1) {
cg[n] = 0;
discontinuity = 0;
if (flag_time_rr == 0)
length = j;
else{
length = 0;
/* check if rr intervals are consecutive */
for (l = i; l < i + j && discontinuity == 0; l++) {
if (fabs(t[l+1] - t[l] - rr[l]) < 0.1)
length += 1;
else{
discontinuity = 1;
length +=1;
}
}
}
if (discontinuity == 0) {
for (l = 0; l < j; l++)
cg[n] += rr[i+l] / j;
x[n] = i;
n++;
}
i += (length);
}
return n;
}
int CG_SD(int j)
{
double sum, sum2;
int i = 0, l, n = 0, length, discontinuity;
while (i < rr_nlin - j + 1) {
sum = 0;
sum2 = 0;
discontinuity = 0;
if (flag_time_rr == 0)
length = j;
else{
length = 0;
/* check if rr intervals are consecutive */
for (l = i; l < i + j && discontinuity == 0; l++) {
if (fabs(t[l+1] - t[l] - rr[l]) < 0.1)
length += 1;
else{
discontinuity = 1;
length += 1;
}
}
}
if (discontinuity == 0){
for (l = 0; l < j; l++){
sum += rr[i+l];
sum2 += rr[i+l] * rr[i+l];
}
x[n] = i;
if ( (sum2 - sum * sum / j) / (j - 1) > 0)
cg[n] = sqrt((sum2 - sum * sum / j) / (j - 1));
else cg[n]=0;
n++;
}
i += (length);
}
return n;
}
int CG_var(int j)
{
double sum, sum2;
int i = 0, l, n = 0, length, discontinuity;
while (i < rr_nlin - j + 1) {
sum = 0;
sum2 = 0;
discontinuity = 0;
if (flag_time_rr == 0)
length = j;
else{
length = 0;
/* check if rr intervals are consecutive */
for (l = i; l < i + j && discontinuity == 0; l++) {
if (fabs(t[l+1] - t[l] - rr[l]) < 0.1)
length += 1;
else{
discontinuity = 1;
length += 1;
}
}
}
if (discontinuity == 0){
for (l = 0; l < j; l++) {
sum += rr[i+l];
sum2 += rr[i+l] * rr[i+l];
}
x[n] = i;
cg[n] = (sum2 - sum * sum / j) / (j-1);
n++;
}
i += (length);
}
return n;
}
int CG_MAD(int j)
{
double sum, sum2;
int i = 0, l, n = 0, length, discontinuity;
while (i < rr_nlin - j + 1) {
sum = 0;
sum2 = 0;
discontinuity = 0;
if (flag_time_rr == 0)
length = j;
else{
length = 0;
/* check if rr intervals are consecutive */
for (l = i; l < i + j && discontinuity == 0; l++) {
if (fabs(t[l+1] - t[l] - rr[l]) < 0.1)
length += 1;
else{
discontinuity = 1;
length += 1;
}
}
}
if (discontinuity == 0){
for (l = 0; l < j; l++)
sum += rr[i+l] / j;
for (l = 0; l < j; l++)
sum2 += fabs(rr[i+l] - sum);
x[n] = i;
cg[n] = sum2 / j;
n++;
}
i += (length);
}
return n;
}
void usage()
{
fprintf(stderr, "%s [options]\n", prog);
fprintf(stderr, " -n SCALE maximum scale factor [default: 20]\n");
fprintf(stderr, " -a STEP difference between consecutive scale\n");
fprintf(stderr, " factors [default: 1]\n");
fprintf(stderr, " -m COUNT template length (m value) [default: 2]\n");
fprintf(stderr, " -r FACTOR tolerance level in proportion to the\n");
fprintf(stderr, " standard deviation [default: 0.15]\n");
fprintf(stderr, " -x VALUE absolute tolerance level\n");
fprintf(stderr, " -i N skip first N input values\n");
fprintf(stderr, " -I N process at most N input values\n");
fprintf(stderr, " -c [1-4] coarse-graining function:\n");
fprintf(stderr, " -c 1: mean\n");
fprintf(stderr, " -c 2: standard deviation [default]\n");
fprintf(stderr, " -c 3: variance\n");
fprintf(stderr, " -c 4: mean absolute deviation\n");
}