/* file: wfdbmerge.c G. Moody and T. Kyaw 8 October 2004 Last revised: 17 November 2004 ------------------------------------------------------------------------------- wfdbmerge: Merge (possibly disjoint) WFDB records Copyright (C) 2004 George B. Moody This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. You may contact the author by e-mail (george@mit.edu) or postal mail (MIT Room E25-505A, Cambridge, MA 02139 USA). For updates to this software, please visit PhysioNet (http://www.physionet.org/). _______________________________________________________________________________ Use this program to create a single record from a set of records (which should be named in order of assembly on the command line). For convenience, the records can be specified using either record names or the names of the associated header files; thus to assemble all of the records in directory "foo" into a single record, it is sufficient to use the command: wfdbmerge foo/*.hea It is very important that each record's header file contain a starting time and date as well as the length of the record in samples. This information should be in the first line of each header file; for details, see http://physionet.org/physiotools/wag/header-5.htm wfdbmerge uses the input records' starting times and lengths to determine if there are any gaps between records that need to be filled in with padding (a special value that indicates a missing sample). wfdbmerge first checks all of the header files to get a complete list of signals, and it arranges the signals in the output record in the order in which they appear in the SIGTYPES list of signal types (see below for the definition of SIGTYPES). This list is a text file that can be kept anywhere in the WFDB path (for example, in the current directory, or in /usr/database). While creating the unified signal list, wfdbmerge also keeps track of the largest and smallest gain used to record each signal. The output signals are written at a constant gain for each signal, which is normally the highest gain used for that signal in any of the input records (provided that the result can be represented in 16 bits). If the result of the gain adjustment would require use of more bits than are permitted by the format of the input records, the format is changed so that precision can be preserved. */ #include #include #define SIGTYPES "signal-types" struct psig_t { char *name; int found; char *units; WFDB_Gain gain; int fmt; int spf; int adcres; int adczero; int baseline; }; struct segd_t { char basetime[32]; long nsamp; }; void memerr() { fprintf(stderr, "insufficient memory -- exiting\n"); exit(3); } char *copystr(char *oldstring) { char *newstring; if (oldstring == NULL || *oldstring == '\0') { if ((newstring = calloc(1, 1)) == NULL) memerr(); } else { if ((newstring = calloc(1, strlen(oldstring) + 1)) == NULL) memerr(); strcpy(newstring, oldstring); } return (newstring); } int main(int argc, char **argv) { static char buf[256], *p, *q, *sigtypes, *recordname, *filename; double *dv, *g, vf; FILE *sfile; int i, nsig, nosig, nseg = argc - 1, s, *smap, ss, t, tsig = 0; struct psig_t *psig = NULL; struct segd_t *seg; unsigned long seglen; WFDB_Sample *vin, *vout; WFDB_Siginfo *si, *so; WFDB_Time tt; if (argc < 2) { fprintf(stderr, "usage: %s RECORD [ RECORD ... ]\n", argv[0]); exit(1); } /* Initialize psig (list of all known signal types). */ if ((sigtypes = wfdbfile(SIGTYPES, NULL)) == NULL) { fprintf(stderr, "%s: can't find %s\n", argv[0], SIGTYPES); exit(1); } if ((sfile = fopen(sigtypes, "r")) == NULL) { fprintf(stderr, "%s: can't open %s\n", argv[0], SIGTYPES); exit(1); } while (fgets(buf, sizeof(buf), sfile)) { if (tsig++ == 0) { if ((psig = calloc(1, sizeof(struct psig_t))) == NULL || (g = calloc(1, sizeof(double))) == NULL) memerr(); } else { if ((psig = realloc(psig, sizeof(struct psig_t) * tsig)) == NULL || (g = realloc(g, sizeof(double) * tsig)) == NULL) memerr(); } psig[tsig - 1].name = copystr(buf); psig[tsig - 1].found = 0; } /* This application was written for the MIMIC 2 database, in which the record segments have names of the form xxxxx_000000, xxxxx_000001, etc. If the first record in the record list has a name of this form, the output record is named xxxxx (i.e., the underscore and what follows it are stripped from the first record's name). If the first record's name does not contain an underscore, the output record is named "out". The input record names may also contain path information if the input records are not in the current directory; we begin by removing any such information, up to and including the last '/' in the record name. */ p = argv[1]; q = p + strlen(p); while ((p < --q) && (q[0] != '/')) ; if ((*q) == '/') q++; if (strtok(recordname = copystr(q), "_")) { filename = calloc(sizeof(char), strlen(recordname) + 5); sprintf(filename, "%s.dat", recordname); } else { recordname = "out"; filename = "out.dat"; } wfdbquiet(); if ((seg = calloc(sizeof(struct segd_t), nseg)) == NULL) memerr(); /* Pass 1: collect a list of all of the available signals. */ for (i = 1; i < argc; i++) { /* Strip off '.hea' if present in record name arguments. */ if (p = strstr(argv[i], ".hea")) *p = '\0'; /* Determine the number of signals in the record. */ if ((nsig = isigopen(argv[i], NULL, 0)) <= 0) exit(2); /* Allocate storage for the signal information structures. */ if ((si = malloc(nsig * sizeof(WFDB_Siginfo))) == NULL) { fprintf(stderr, "%s: insufficient memory\n", argv[0]); exit(3); } /* Get the signal information. */ if (isigopen(argv[i], si, -nsig) != nsig) exit(2); strcpy(seg[i - 1].basetime, mstimstr((WFDB_Time) 0)); seg[i - 1].nsamp = si[0].nsamp; for (s = 0; s < nsig; s++) { /* for each input signal */ /* First, make sure each input has a unique name (if necessary, append a '+' to the name to make it unique). */ for (ss = 0; ss < s; ss++) { if (strcmp(si[s].desc, si[ss].desc) == 0) { if ((p = calloc(1, strlen(si[s].desc) + 2)) == NULL) memerr(); sprintf(p, "%s+", si[s].desc); si[s].desc = p; } } /* Next, check each unique name against the list of all signal names, and flag the matching name if it is found. */ for (t = 0; t < tsig; t++) if (strcmp(si[s].desc, psig[t].name) == 0) break; /* If the signal name was not in the list, add it to the end of the list. */ if (t >= tsig) { psig = realloc(psig, sizeof(struct psig_t) * ++tsig); g = realloc(g, sizeof(double) * tsig); if (psig == NULL || g == NULL) memerr(); psig[t].name = copystr(si[s].desc); } if (psig[t].found != 1) { psig[t].found = 1; psig[t].units = copystr(si[s].units); psig[t].gain = g[t] = si[s].gain; psig[t].fmt = si[s].fmt; psig[t].spf = si[s].spf; psig[t].adcres = si[s].adcres; psig[t].adczero = si[s].adczero; psig[t].baseline = si[s].baseline; } else { if (strcmp(psig[t].units, si[s].units)) fprintf(stderr, "warning: units of %s change\n", si[s].desc); if (psig[t].gain != si[s].gain) { if (si[s].gain > psig[t].gain) { psig[t].gain = si[s].gain; psig[t].baseline = si[s].baseline; } if (si[s].gain < g[t]) g[t] = si[s].gain; fprintf(stderr, "warning: gain of %s changes\n", si[s].desc); } if (psig[t].fmt != si[s].fmt) fprintf(stderr, "warning: format of %s changes\n", si[s].desc); if (psig[t].spf != si[s].spf) fprintf(stderr, "warning: spf of %s changes\n", si[s].desc); if (psig[t].adcres != si[s].adcres) fprintf(stderr, "warning: adcres of %s changes\n", si[s].desc); if (psig[t].adczero != si[s].adczero) fprintf(stderr, "warning: adczero of %s changes\n", si[s].desc); if (psig[t].baseline != si[s].baseline) fprintf(stderr, "warning: baseline of %s changes\n", si[s].desc); } } } for (s = t = 0; s < tsig; s++) if (psig[s].found) t++; if ((so = calloc(sizeof(WFDB_Siginfo), t)) == NULL) memerr(); for (s = t = 0; s < tsig; s++) if (psig[s].found) { so[t].fname = filename; so[t].desc = copystr(psig[s].name); so[t].units = copystr(psig[s].units); so[t].gain = psig[s].gain; so[t].fmt = psig[s].fmt; so[t].spf = psig[s].spf; so[t].adcres = psig[s].adcres; if (psig[s].gain != g[s]) { /* there are gain changes */ double dg = psig[s].gain / g[s]; if (dg < 0.0) dg = - dg; while (dg > 1.0) { so[t].adcres++; dg /= 2.0; } if (so[t].adcres > 16) { /* The highest precision supported by the current version of the WFDB library is 16 bits. If we are here, the gain of signal t has varied so much that we cannot represent the smallest quantization steps in 16 bits. */ fprintf(stderr, "warning: there may be loss of precision" " in the %s signal\n", so[t].desc); while (--so[t].adcres > 16) so[t].gain /= 2.0; } } so[t].adczero = psig[s].adczero; so[t++].baseline = psig[s].baseline; } nosig = t; /* Change the output format if necessary. This is done to preserve precision where possible if there have been gain changes in the signals. The current implementation of the WFDB library requires all signals in any signal group to be written in the same format, so the signal that requires the highest precision determines the format used for all of the others as well. */ for (i = t = 0; t < nosig; t++) if (i < so[t].adcres) i = so[t].adcres; if (i > 12) so[0].fmt = 16; /* 16-bit samples */ else if (i > 8) so[0].fmt = 212; /* bit-packed 12-bit samples */ for (t = 1; t < nosig; t++) so[t].fmt = so[0].fmt; if (osigfopen(so, nosig) != nosig) exit(4); if ((smap = calloc(sizeof(int), nosig)) == NULL || (vin = calloc(sizeof(WFDB_Sample), nosig)) == NULL || (vout = calloc(sizeof(WFDB_Sample), nosig)) == NULL || (dv = calloc(sizeof(double), nosig)) == NULL) memerr(); for (i = 1; i < argc; i++) { if ((nsig = isigopen(argv[i], si, nosig)) < 1) break; /* this shouldn't happen! */ for (s = 0; s < nsig; s++) { /* for each input signal */ /* First, make sure each input has a unique name (if necessary, append a '+' to the name to make it unique). */ for (ss = 0; ss < s; ss++) { if (strcmp(si[s].desc, si[ss].desc) == 0) { if ((p = calloc(1, strlen(si[s].desc) + 2)) == NULL) memerr(); sprintf(p, "%s+", si[s].desc); si[s].desc = p; } } } for (s = 0; s < nosig; s++) { /* for each output signal */ vout[s] = (1 << (so[s].adcres-1)); smap[s] = -1; for (t = 0; t < nsig; t++) if (strcmp(so[s].desc, si[t].desc) == 0) { smap[s] = t; break; } } if (i == 1) tt = (WFDB_Time) 0; else { WFDB_Time ttt = strtim(seg[i - 1].basetime); while (tt < ttt) { /* gap between segments */ putvec(vout); tt++; } } seglen = seg[i - 1].nsamp; /* Calculate gain and baseline corrections as needed. */ for (t = 0; t < nosig; t++) { if ((s = smap[t]) > -1) { g[t] = so[t].gain / si[s].gain; dv[t] = so[t].baseline - g[t] * si[s].baseline; } } while (seglen > 0) { getvec(vin); for (t = 0; t < nosig; t++) { s = smap[t]; if (s > -1) { if (g[t] != 1.0 || dv[t] != 0.0) { if ((vf = vin[s]*g[t] + dv[t]) >= 0.0) vout[t] = (WFDB_Sample)(vf + 0.5); else vout[t] = (WFDB_Sample)(vf - 0.5); } else vout[t] = vin[s]; } } putvec(vout); seglen--; } } newheader(recordname); }