/* file: wfdbmerge.c		G. Moody and T. Kyaw	8 October 2004
   				Last revised:	       17 November 2004

-------------------------------------------------------------------------------
wfdbmerge: Merge (possibly disjoint) WFDB records
Copyright (C) 2004 George B. Moody

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place - Suite 330, Boston, MA 02111-1307, USA.

You may contact the author by e-mail (george@mit.edu) or postal mail
(MIT Room E25-505A, Cambridge, MA 02139 USA).  For updates to this software,
please visit PhysioNet (http://www.physionet.org/).
_______________________________________________________________________________

Use this program to create a single record from a set of records (which should
be named in order of assembly on the command line).  For convenience, the
records can be specified using either record names or the names of the
associated header files;  thus to assemble all of the records in directory
"foo" into a single record, it is sufficient to use the command:

	wfdbmerge foo/*.hea

It is very important that each record's header file contain a starting time
and date as well as the length of the record in samples.  This information
should be in the first line of each header file;  for details, see

	http://physionet.org/physiotools/wag/header-5.htm

wfdbmerge uses the input records' starting times and lengths to determine
if there are any gaps between records that need to be filled in with
padding (a special value that indicates a missing sample).

wfdbmerge first checks all of the header files to get a complete list of
signals, and it arranges the signals in the output record in the order in which
they appear in the SIGTYPES list of signal types (see below for the definition
of SIGTYPES).  This list is a text file that can be kept anywhere in the WFDB
path (for example, in the current directory, or in /usr/database).  While
creating the unified signal list, wfdbmerge also keeps track of the largest
and smallest gain used to record each signal.  The output signals are written
at a constant gain for each signal, which is normally the highest gain used
for that signal in any of the input records (provided that the result can be
represented in 16 bits).  If the result of the gain adjustment would require
use of more bits than are permitted by the format of the input records, the
format is changed so that precision can be preserved.
*/

#include <stdio.h>
#include <wfdb/wfdb.h>
#define SIGTYPES "signal-types"

struct psig_t {
    char *name;
    int found;
    char *units;
    WFDB_Gain gain;
    int fmt;
    int spf;
    int adcres;
    int adczero;
    int baseline;
};

struct segd_t {
    char basetime[32];
    long nsamp;
};

void memerr()
{
    fprintf(stderr, "insufficient memory -- exiting\n");
    exit(3);
}

char *copystr(char *oldstring)
{
    char *newstring;

    if (oldstring == NULL || *oldstring == '\0') {
	if ((newstring = calloc(1, 1)) == NULL)
	    memerr();
    } else {
	if ((newstring = calloc(1, strlen(oldstring) + 1)) == NULL)
	    memerr();
	strcpy(newstring, oldstring);
    }
    return (newstring);
}

int main(int argc, char **argv)
{
    static char buf[256], *p, *q, *sigtypes, *recordname, *filename;
    double *dv, *g, vf;
    FILE *sfile;
    int i, nsig, nosig, nseg = argc - 1, s, *smap, ss, t, tsig = 0;
    struct psig_t *psig = NULL;
    struct segd_t *seg;
    unsigned long seglen;
    WFDB_Sample *vin, *vout;
    WFDB_Siginfo *si, *so;
    WFDB_Time tt;

    if (argc < 2) {
	fprintf(stderr, "usage: %s RECORD [ RECORD ... ]\n", argv[0]);
	exit(1);
    }

    /* Initialize psig (list of all known signal types). */
    if ((sigtypes = wfdbfile(SIGTYPES, NULL)) == NULL) {
	fprintf(stderr, "%s: can't find %s\n", argv[0], SIGTYPES);
	exit(1);
    }
    if ((sfile = fopen(sigtypes, "r")) == NULL) {
	fprintf(stderr, "%s: can't open %s\n", argv[0], SIGTYPES);
	exit(1);
    }
    while (fgets(buf, sizeof(buf), sfile)) {
	if (tsig++ == 0) {
	    if ((psig = calloc(1, sizeof(struct psig_t))) == NULL ||
		(g = calloc(1, sizeof(double))) == NULL)
		memerr();
	} else {
	    if ((psig = realloc(psig, sizeof(struct psig_t) * tsig)) == NULL ||
		(g = realloc(g, sizeof(double) * tsig)) == NULL)
		memerr();
	}
	psig[tsig - 1].name = copystr(buf);
	psig[tsig - 1].found = 0;
    }

    /* This application was written for the MIMIC 2 database, in which the
       record segments have names of the form xxxxx_000000, xxxxx_000001, etc.
       If the first record in the record list has a name of this form, the
       output record is named xxxxx (i.e., the underscore and what follows it
       are stripped from the first record's name).  If the first record's name
       does not contain an underscore, the output record is named "out".

       The input record names may also contain path information if the input
       records are not in the current directory;  we begin by removing any
       such information, up to and including the last '/' in the record name.
    */
    p = argv[1];
    q = p + strlen(p);
    while ((p < --q) && (q[0] != '/'))
	;
    if ((*q) == '/') q++;
    if (strtok(recordname = copystr(q), "_")) {
	filename = calloc(sizeof(char), strlen(recordname) + 5);
	sprintf(filename, "%s.dat", recordname);
    }
    else {
	recordname = "out";
	filename = "out.dat";
    }
    wfdbquiet();
    if ((seg = calloc(sizeof(struct segd_t), nseg)) == NULL)
	memerr();

    /* Pass 1: collect a list of all of the available signals. */
    for (i = 1; i < argc; i++) {
	/* Strip off '.hea' if present in record name arguments. */
	if (p = strstr(argv[i], ".hea"))
	    *p = '\0';
	/* Determine the number of signals in the record. */
	if ((nsig = isigopen(argv[i], NULL, 0)) <= 0)
	    exit(2);
	/* Allocate storage for the signal information structures. */
	if ((si = malloc(nsig * sizeof(WFDB_Siginfo))) == NULL) {
	    fprintf(stderr, "%s: insufficient memory\n", argv[0]);
	    exit(3);
	}
	/* Get the signal information. */
	if (isigopen(argv[i], si, -nsig) != nsig)
	    exit(2);
	strcpy(seg[i - 1].basetime, mstimstr((WFDB_Time) 0));
	seg[i - 1].nsamp = si[0].nsamp;
	for (s = 0; s < nsig; s++) {	/* for each input signal */
	    /* First, make sure each input has a unique name (if necessary,
	       append a '+' to the name to make it unique). */
	    for (ss = 0; ss < s; ss++) {
		if (strcmp(si[s].desc, si[ss].desc) == 0) {
		    if ((p = calloc(1, strlen(si[s].desc) + 2)) == NULL)
			memerr();
		    sprintf(p, "%s+", si[s].desc);
		    si[s].desc = p;
		}
	    }
	    /* Next, check each unique name against the list of all
	       signal names, and flag the matching name if it is found. */
	    for (t = 0; t < tsig; t++)
		if (strcmp(si[s].desc, psig[t].name) == 0)
		    break;
	    /* If the signal name was not in the list, add it to the end of
	       the list. */
	    if (t >= tsig) {
		psig = realloc(psig, sizeof(struct psig_t) * ++tsig);
		g = realloc(g, sizeof(double) * tsig);
		if (psig == NULL || g == NULL)
		    memerr();
		psig[t].name = copystr(si[s].desc);
	    }
	    if (psig[t].found != 1) {
		psig[t].found = 1;
		psig[t].units = copystr(si[s].units);
		psig[t].gain = g[t] = si[s].gain;
		psig[t].fmt = si[s].fmt;
		psig[t].spf = si[s].spf;
		psig[t].adcres = si[s].adcres;
		psig[t].adczero = si[s].adczero;
		psig[t].baseline = si[s].baseline;
	    } else {
		if (strcmp(psig[t].units, si[s].units))
		    fprintf(stderr, "warning: units of %s change\n",
			    si[s].desc);
		if (psig[t].gain != si[s].gain) {
		    if (si[s].gain > psig[t].gain) {
			psig[t].gain = si[s].gain;
			psig[t].baseline = si[s].baseline;
		    }
		    if (si[s].gain < g[t])
			g[t] = si[s].gain;
		    fprintf(stderr, "warning: gain of %s changes\n",
			    si[s].desc);
		}
		if (psig[t].fmt != si[s].fmt)
		    fprintf(stderr, "warning: format of %s changes\n",
			    si[s].desc);
		if (psig[t].spf != si[s].spf)
		    fprintf(stderr, "warning: spf of %s changes\n",
			    si[s].desc);
		if (psig[t].adcres != si[s].adcres)
		    fprintf(stderr, "warning: adcres of %s changes\n",
			    si[s].desc);
		if (psig[t].adczero != si[s].adczero)
		    fprintf(stderr, "warning: adczero of %s changes\n",
			    si[s].desc);
		if (psig[t].baseline != si[s].baseline)
		    fprintf(stderr, "warning: baseline of %s changes\n",
			    si[s].desc);
	    }
	}
    }
    for (s = t = 0; s < tsig; s++)
	if (psig[s].found)
	    t++;
    if ((so = calloc(sizeof(WFDB_Siginfo), t)) == NULL)
	memerr();

    for (s = t = 0; s < tsig; s++)
	if (psig[s].found) {
	    so[t].fname = filename;
	    so[t].desc = copystr(psig[s].name);
	    so[t].units = copystr(psig[s].units);
	    so[t].gain = psig[s].gain;
	    so[t].fmt = psig[s].fmt;
	    so[t].spf = psig[s].spf;
	    so[t].adcres = psig[s].adcres;
	    if (psig[s].gain != g[s]) { /* there are gain changes */
		double dg = psig[s].gain / g[s];

		if (dg < 0.0) dg = - dg;
		while (dg > 1.0) {
		    so[t].adcres++;
		    dg /= 2.0;
		}
		if (so[t].adcres > 16) {
		    /* The highest precision supported by the current version
		       of the WFDB library is 16 bits.  If we are here, the
		       gain of signal t has varied so much that we cannot
		       represent the smallest quantization steps in 16 bits. */
		    fprintf(stderr, "warning: there may be loss of precision"
			    " in the %s signal\n", so[t].desc);
		    while (--so[t].adcres > 16)
			so[t].gain /= 2.0;
		}
	    }
	    so[t].adczero = psig[s].adczero;
	    so[t++].baseline = psig[s].baseline;
	}
    nosig = t;

    /* Change the output format if necessary.  This is done to preserve
       precision where possible if there have been gain changes in the
       signals.  The current implementation of the WFDB library requires
       all signals in any signal group to be written in the same format,
       so the signal that requires the highest precision determines the
       format used for all of the others as well. 
    */
    for (i = t = 0; t < nosig; t++)
	if (i < so[t].adcres) i = so[t].adcres;
    if (i > 12) so[0].fmt = 16;		/* 16-bit samples */
    else if (i > 8) so[0].fmt = 212;	/* bit-packed 12-bit samples */
    for (t = 1; t < nosig; t++)
	so[t].fmt = so[0].fmt;

    if (osigfopen(so, nosig) != nosig)
	exit(4);

    if ((smap = calloc(sizeof(int), nosig)) == NULL ||
	(vin = calloc(sizeof(WFDB_Sample), nosig)) == NULL ||
	(vout = calloc(sizeof(WFDB_Sample), nosig)) == NULL ||
	(dv = calloc(sizeof(double), nosig)) == NULL)
	memerr();

    for (i = 1; i < argc; i++) {
	if ((nsig = isigopen(argv[i], si, nosig)) < 1)
	    break;		/* this shouldn't happen! */
	for (s = 0; s < nsig; s++) {	/* for each input signal */
	    /* First, make sure each input has a unique name (if necessary,
	       append a '+' to the name to make it unique). */
	    for (ss = 0; ss < s; ss++) {
		if (strcmp(si[s].desc, si[ss].desc) == 0) {
		    if ((p = calloc(1, strlen(si[s].desc) + 2)) == NULL)
			memerr();
		    sprintf(p, "%s+", si[s].desc);
		    si[s].desc = p;
		}
	    }
	}
	for (s = 0; s < nosig; s++) {	/* for each output signal */
	    vout[s] = (1 << (so[s].adcres-1));
	    smap[s] = -1;
	    for (t = 0; t < nsig; t++)
		if (strcmp(so[s].desc, si[t].desc) == 0) {
		    smap[s] = t;
		    break;
		}
	}
	if (i == 1)
	    tt = (WFDB_Time) 0;
	else {
	    WFDB_Time ttt = strtim(seg[i - 1].basetime);
	    while (tt < ttt) {	/* gap between segments */
		putvec(vout);
		tt++;
	    }
	}
	seglen = seg[i - 1].nsamp;
	/* Calculate gain and baseline corrections as needed. */
	for (t = 0; t < nosig; t++) {
	    if ((s = smap[t]) > -1) {
		g[t] = so[t].gain / si[s].gain;
		dv[t] = so[t].baseline - g[t] * si[s].baseline;
	    }
	}
	while (seglen > 0) {
	    getvec(vin);
	    for (t = 0; t < nosig; t++) {
		s = smap[t];
		if (s > -1) {
		    if (g[t] != 1.0 || dv[t] != 0.0) {
			if ((vf = vin[s]*g[t] + dv[t]) >= 0.0)
			    vout[t] = (WFDB_Sample)(vf + 0.5);
			else
			    vout[t] = (WFDB_Sample)(vf - 0.5);
		    }
		    else
			vout[t] = vin[s];
		}
	    }
	    putvec(vout);
	    seglen--;
	}
    }

    newheader(recordname);
}