/* file: pbindex.c G. Moody 5 March 2008 Last revised: 13 March 2012 ------------------------------------------------------------------------------- pbindex.c: create index entries for records Copyright (C) 2008-2012 George B. Moody This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. You may contact the author by e-mail (george@mit.edu) or postal mail (MIT Room E25-505A, Cambridge, MA 02139 USA). For updates to this software, please visit PhysioNet (http://www.physionet.org/). _______________________________________________________________________________ This program reads a list of PhysioBank records from its standard input and writes an index of their contents on its standard output. The (text) output of this program consists of one line per signal and annotation file, containing these tab-separated fields: record name class signal or annotator name sampling frequency (Hz) gain (A/D units per physical unit), or number of annotations duration (in seconds) time intervals during which samples or annotations are present (in seconds) * If the gain is not recorded in the header, it is reported as 200. * If the type of physical unit is not recorded, it is reported as 'mV'. In variable-format multisegment records, the recorded gain may not be constant; in these records, the gain recorded in the layout header is reported. * Signal loss is reported only for variable-format multisegment records. In all other cases, the reported time interval is the duration of the record. * In the most common case, the signal is present throughout the record, and the final field (time intervals) is omitted. The gain field contains an embedded space between the numerical value and the units, and the time intervals field contains embedded spaces separating disjoint intervals. Note that this program does not process remote variable-layout records, or remote records in which the header does not specify the record length (because it uses ordinary file I/O to read the segment headers in the first case, or to determine the length of the record from seeking in the signal file in the second case). */ #include #include #include #include #include #define AFTYPES "/home/physionet/html/physiobank/database/aftypes" #define SIGTYPES "/home/physionet/html/physiobank/database/sigtypes" char **aname = NULL, **annclass, *pname, **signame = NULL, **sigclass; int annclasses = 0, nclasses = 0; WFDB_Anninfo ai; WFDB_Annotation annot; WFDB_Frequency sfreq; WFDB_Time t0, tf; struct anstats { char *name; int n, anntyp; WFDB_Time t0, tf; struct anstats *next; } *as, *asp, *as0, *lastr; void init(void); char *signaltype(char *name, char *units); char *annotatortype(char *name); int process(char *record); int processms(char *record, char *sdesc, WFDB_Time tf); void process_info(char *record); char *token(char *p); char *ltimstr(WFDB_Time t); char *prog_name(char *s); void cleanup(void); main(int argc, char **argv) { static char record[1024]; pname = prog_name(argv[0]); init(); while (fgets(record, sizeof(record), stdin)) { record[strlen(record) - 1] = '\0'; (void)process(record); } cleanup(); exit(0); } void init() { char buf[256], *p, *prog_name(); FILE *ifile; int i = 0; if ((ifile = fopen(SIGTYPES, "r")) == NULL) { fprintf(stderr, "%s: can't open %s\n", pname, SIGTYPES); exit(1); } while (fgets(buf, sizeof(buf), ifile)) nclasses++; rewind(ifile); signame = calloc(nclasses, sizeof(char *)); sigclass = calloc(nclasses, sizeof(char *)); if (sigclass == NULL || signame == NULL) { fclose(ifile); fprintf(stderr, "%s: insufficient memory\n", pname); exit(2); } while (fgets(buf, sizeof(buf), ifile)) { p = strstr(buf, "\t"); if (p == NULL) continue; *p++ = '\0'; sigclass[i] = calloc(strlen(buf)+1, sizeof(char)); strcpy(sigclass[i], buf); signame[i] = calloc(strlen(p), sizeof(char)); p[strlen(p)-1] = '\0'; strcpy(signame[i++], p); } fclose(ifile); if ((ifile = fopen(AFTYPES, "r")) == NULL) { fprintf(stderr, "%s: can't open aftypes\n", pname); exit(1); } while (fgets(buf, sizeof(buf), ifile)) annclasses++; rewind(ifile); aname = calloc(annclasses, sizeof(char *)); annclass = calloc(annclasses, sizeof(char *)); if (annclass == NULL || aname == NULL) { fclose(ifile); fprintf(stderr, "%s: insufficient memory\n", pname); exit(2); } i = 0; while (fgets(buf, sizeof(buf), ifile)) { p = strstr(buf, "\t"); if (p == NULL) continue; *p++ = '\0'; annclass[i] = calloc(strlen(buf)+1, sizeof(char)); strcpy(annclass[i], buf); aname[i] = calloc(strlen(p), sizeof(char)); p[strlen(p)-1] = '\0'; strcpy(aname[i++], p); } fclose(ifile); ai.stat = WFDB_READ; } char *signaltype(char *name, char *units) { int i = 0; for (i = 0; i < nclasses; i++) { if (strcmp(name, signame[i]) == 0) return(sigclass[i]); } if (strcmp(units, "mV") == 0) return("ECG"); else if (strcmp(units, "mmHg") == 0) return("BP"); else return("unknown"); } char *annotatortype(char *name) { int i = 0; for (i = 0; i < annclasses; i++) { if (strcmp(name, aname[i]) == 0) return(annclass[i]); } return("AnnU"); } int process(char *record) { char *p, *pname; int anum = 0, i, n, nsig = 0, vlmsrec = 0; int nbp = 0, nco = 0, nco2 = 0, necg = 0, neeg = 0, nemg = 0, neog = 0, nep = 0, nflow = 0, nhr = 0, nnoise = 0, no2 = 0, npleth = 0, npos = 0, nresp = 0, nsound = 0, nst = 0, nstatus = 0, nstim = 0, nsv = 0, ntemp = 0, nscg=0; FILE *ifile; WFDB_Siginfo *s; WFDB_Time t; anum = nsig = nbp = nco = nco2 = necg = neeg = nemg = neog = nep = nflow = nhr = nnoise = no2 = npleth = npos = nresp = nsound = nst = nstatus = nstim = nsv = ntemp = 0; /* Discover the number of signals defined in the header. */ wfdbquiet(); if ((nsig = isigopen(record, NULL, 0)) < 0) { wfdbquit(); return (1); } wfdbverbose(); /* Index metadata from the header. */ process_info(record); if (nsig == 0) /* no signals -- sfreq needed for annotations */ sfreq = sampfreq(record); else { /* Allocate storage for nsig signal information structures. */ if ((s = malloc(nsig * sizeof(WFDB_Siginfo))) == NULL) { fprintf(stderr, "%s: insufficient memory\n", pname); return (2); } if ((nsig = isigopen(record, s, nsig)) <= 0) return (0); setgvmode(WFDB_LOWRES); sfreq = sampfreq(NULL); t = strtim("e"); if (*(s[0].fname) != '~') p = wfdbfile(s[0].fname, NULL); else p = NULL; if (s[0].nsamp != t && s[0].nsamp == 0) vlmsrec = 1; /* it's a variable-layout multisegment record */ else if (t == 0 && p && /* length unspecified in header file */ (ifile = fopen(p, "r")) && (fseek(ifile, 0L, 2) == 0)) { int framesize = 0; long nbytes = ftell(ifile) - wfdbgetstart(0); /* # data bytes */ fclose(ifile); for (i = 0; i < nsig && s[i].group == 0; i++) framesize += s[i].spf; /* frame size in samples */ switch (s[0].fmt) { case 8: case 80: t = nbytes / framesize; break; default: case 16: case 61: case 160: t = nbytes / (2*framesize); break; case 212: t = (2L * nbytes) / (3*framesize); break; case 310: case 311: t = (3L * nbytes) / (4*framesize); break; } } for (i = 0; i < nsig; i++) { (void)printf("%s\t", record); if (s[i].units == NULL) s[i].units = "mV"; p = signaltype(s[i].desc, s[i].units); if (strcmp(p, "BP") == 0) (void)printf("BP%d\t", ++nbp); else if (strcmp(p, "CO") == 0) (void)printf("CO-%d\t", ++nco); else if (strcmp(p, "CO2") == 0) (void)printf("CO2-%d\t", ++nco2); else if (strcmp(p, "ECG") == 0) (void)printf("ECG%d\t", ++necg); else if (strcmp(p, "EEG") == 0) (void)printf("EEG%d\t", ++neeg); else if (strcmp(p, "EMG") == 0) (void)printf("EMG%d\t", ++nemg); else if (strcmp(p, "EOG") == 0) (void)printf("EOG%d\t", ++neog); else if (strcmp(p, "EP") == 0) (void)printf("EP%d\t", ++nep); else if (strcmp(p, "Flow") == 0) (void)printf("Flow%d\t", ++nflow); else if (strcmp(p, "HR") == 0) (void)printf("HR%d\t", ++nhr); else if (strcmp(p, "Noise") == 0) (void)printf("Noise%d\t", ++nnoise); else if (strcmp(p, "O2") == 0) (void)printf("O2-%d\t", ++no2); else if (strcmp(p, "PLETH") == 0) (void)printf("PLETH%d\t", ++npleth); else if (strcmp(p, "Pos") == 0) (void)printf("Pos%d\t", ++npos); else if (strcmp(p, "Resp") == 0) (void)printf("Resp%d\t", ++nresp); else if (strcmp(p, "SCG") == 0) (void)printf("SCG%d\t", ++nscg); else if (strcmp(p, "Sound") == 0) (void)printf("Sound%d\t", ++nsound); else if (strcmp(p, "ST") == 0) (void)printf("ST%d\t", ++nst); else if (strcmp(p, "Status") == 0) (void)printf("Status%d\t", ++nstatus); else if (strcmp(p, "Stim") == 0) (void)printf("Stim%d\t", ++nstim); else if (strcmp(p, "SV") == 0) (void)printf("SV%d\t", ++nsv); else if (strcmp(p, "Temp") == 0) (void)printf("Temp%d\t", ++ntemp); else (void)printf("%s\t", p); (void)printf("%s\t", s[i].desc); printf("%g\t", sfreq * s[i].spf); if (s[i].gain) printf("%g adu/%s\t", s[i].gain, s[i].units?s[i].units:"mV"); else printf("no calibration\t"); if (vlmsrec) { char sdesc[256]; sprintf(sdesc, " %s\r\n", s[i].desc); processms(record, sdesc, t); } else (void)printf("%s\n", ltimstr(t)); } } wfdbquiet(); /* suppress WFDB library error messages */ for (i = 0; i < annclasses; i++) { ai.name = aname[i]; if (annopen(record, &ai, 1) < 0) continue; /* file doesn't exist, move on */ if (getann(0, &annot) < 0) continue; /* file is empty, move on */ t0 = tf = annot.time; n = 1; asp = lastr = NULL; as0 = as = calloc(sizeof(struct anstats), 1); if (annot.anntyp == RHYTHM) { p = annot.aux+1; lastr = as; as->tf = 0; } else { p = annstr(annot.anntyp); as->tf = annot.time; } as->name = calloc(strlen(p) + 1, 1); strcpy(as->name, p); as->n = 1; as->anntyp = annot.anntyp; as->t0 = annot.time; while (getann(0, &annot) >= 0) { n++; tf = annot.time; if (annot.anntyp == RHYTHM) p = annot.aux+1; else (p = annstr(annot.anntyp)); as = as0; while (as) { if (strcmp(as->name, p) == 0) { as->n++; if (annot.anntyp == RHYTHM) { as->t0 = annot.time; if (lastr) lastr->tf += annot.time - lastr->t0; else as->tf = annot.time; lastr = as; } else as->tf = annot.time; break; } asp = as; as = as->next; } if (as == NULL) { as = calloc(sizeof(struct anstats), 1); as->name = calloc(strlen(p) + 1, 1); strcpy(as->name, p); as->n = 1; as->anntyp = annot.anntyp; as->t0 = annot.time; if (annot.anntyp == RHYTHM) { lastr = as; as->tf = 0; } else as->tf = annot.time; if (asp) asp->next = as; else as0 = as; } } if (lastr) lastr->tf += tf - lastr->t0; printf("%s\t%s%d\t%s\t%g\t%ld\t", record, annclass[i], ++anum, ai.name, sfreq, n); printf("%s\t", ltimstr(tf - t0)); printf("%s-", ltimstr(t0)); printf("%s\n", ltimstr(tf)); while (as = as0) { printf("%s\t%s%d\t%s/%s\t%g\t%ld\t", record, annclass[i], anum, ai.name, as->name, sfreq, as->n); if (as->anntyp == RHYTHM) printf("%s\n", ltimstr(as->tf)); else { printf("%s\t", ltimstr(as->tf - as->t0)); printf("%s-", ltimstr(as->t0)); printf("%s\n", ltimstr(as->tf)); } as0 = as->next; free(as->name); free(as); } as0 = NULL; iannclose(0); } wfdbverbose(); /* reenable error messages */ wfdbquit(); return (0); } int processms(char *record, char *sdesc, WFDB_Time tf) { char buf[256], *d, *p, *q, *tbp, *tbpmax, *hfname, *shfname; FILE *ifile, *sfile; int signalon = 0; static char tbuf[204800]; WFDB_Time t = 0, t0 = 0, tsum = 0; p = wfdbfile("hea", record); hfname = calloc(strlen(p) + 1, 1); strcpy(hfname, p); if ((ifile = fopen(hfname, "r")) == NULL) { fprintf(stderr, "%s: can't open %s\n", pname, hfname); free(hfname); return (-1); } for (d = p + strlen(p); d > p; d--) if (*(d-1) == '/') { *d = '\0'; break; } shfname = calloc(strlen(hfname) + 16, 1); strcpy(shfname, p); d = shfname + strlen(shfname); /* d points to first char after '/' */ tbp = tbuf; *tbp = '\0'; tbpmax = tbuf + sizeof(tbuf) - 50; fgets(buf, sizeof(buf), ifile); /* read and ignore first two lines */ fgets(buf, sizeof(buf), ifile); while (fgets(buf, sizeof(buf), ifile)) { /* read a segment descriptor */ char *tp; if (buf[0] == '~') { /* segment is null (all signals off) */ if (signalon) { if (tbp < tbpmax) { sprintf(tbp, "-%s", ltimstr(t)); tbp += strlen(tbp); if (tbp >= tbpmax) { sprintf(tbp, " ..."); break; } } tsum += t - t0; signalon = 0; } t += atol(buf+2); continue; } for (tp = buf+1; *tp != ' '; tp++) ; *tp = '\0'; sprintf(d, "%s.hea", buf); if (sfile = fopen(shfname, "r")) { /* open the segment header file */ char sbuf[256]; int i; fgets(sbuf, sizeof(sbuf), sfile); /* read and ignore the first line */ i = 1; while ((p = fgets(sbuf, sizeof(sbuf), sfile)) && *sbuf != '#' && (i = strcmp(sbuf + strlen(sbuf) - strlen(sdesc), sdesc))) ; /* stop reading if the specified signal is found */ if (i == 0) { /* signal appears in this segment */ if (signalon == 0) { if (tbp < tbpmax) { sprintf(tbp, " %s", ltimstr(t)); /* signal begins here */ tbp += strlen(tbp); } t0 = t; signalon = 1; } } else { /* signal does not appear in this segment */ if (signalon) { if (tbp < tbpmax) { sprintf(tbp, "-%s", ltimstr(t)); /* signal ends here */ tbp += strlen(tbp); } tsum += t - t0; signalon = 0; if (tbp >= tbpmax) { sprintf(tbp, " ..."); break; } } } t += atol(tp+1); fclose(sfile); } } if (signalon) { sprintf(tbp, "-%s", ltimstr(t)); tsum += t - t0; } printf("%s", ltimstr(tsum)); if (tbuf[0]) printf("\t%s", tbuf+1); printf("\n"); fclose(ifile); free(shfname); free(hfname); } void process_info(char *record) { char *info, *p, *sex = NULL; int ndiag = 0, ninfo = 0, nmeds = 0; double age = -1.0; if (info = getinfo(record)) { /* Find the first non-space in the first info string. */ for (p = info; *p && *p == ' '; p++) ; if ('0' <= *p && *p <= '9') { /* If the first token of the first info string is numeric, and the second token is 'f', 'F', 'm', or 'M', the current .hea file does not have tagged info, and the first and second tokens are the age and sex; and the second info string (if present) contains the medications. Handle this case first. */ sscanf(p, "%lf", &age); p = token(p); /* go to the next token */ if (p && (*p == 'm' || *p == 'M')) sex = "M"; else if (p && (*p == 'f' || *p == 'F')) sex = "F"; if (sex == NULL) { /* it wasn't age and sex after all! */ age = -1.0; p = info; } /* If there are any more tokens, save them as 'Info'. */ if (p = token(p)) printf("%s\tInfo%d\t%s\n", record, ++ninfo, p); if (info = getinfo((char *)NULL)) { printf("%s\tMeds%d\t%s\n", record, ++nmeds, info); info = getinfo((char *)NULL); } } /* process standard (tagged) info */ while (info) { if (age < 0) { if ((p = strstr(info, "age")) || (p = strstr(info, "Age"))) { if (p = token(p)) { sscanf(p, "%lf", &age); } /* Additional tagged data may follow age. Continue processing the remainder of this info string below. */ if (!(info = token(p))) /* If there is nothing else, get the next info if any. */ info = getinfo((char *)NULL); } } if (sex == NULL) { if (info && ((p = strstr(info, "sex")) || (p = strstr(info, "Sex")))) { if ((p = token(p)) && (*p == 'm' || *p == 'M')) sex = "M"; else if (p && (*p == 'f' || *p == 'F')) sex = "F"; /* Additional tagged data may follow sex. Continue processing the remainder of this info string. */ if (!(info = token(p))) /* If there is nothing else, get the next info if any. */ info = getinfo((char *)NULL); } } /* Diagnoses may be present in more than one info string. */ if (info && *info && ((p=strstr(info,"diagnos")) || (p=strstr(info,"Diagnos")))) { if ((p = token(p)) == NULL) /* If nothing follows the 'diagnosis' tag, assume the next info is the diagnosis. */ p = getinfo((char *)NULL); if (p) { printf("%s\tDiag%d\t%s\n", record, ++ndiag, p); /* This info has been consumed; get the next info if any. */ info = getinfo((char *)NULL); continue; } } if (info && *info && ((p=strstr(info,"medication"))||(p=strstr(info,"Medication")))) { if ((p = token(p)) == NULL) /* If nothing follows the 'medication' tag, assume the next info is the medication. */ p = getinfo((char *)NULL); if (p) { printf("%s\tMeds%d\t%s\n", record, ++nmeds, p); /* This info has been consumed; get the next info if any. */ info = getinfo((char *)NULL); continue; } } /* Process any info that was not identified above. */ if (info && *info) printf("%s\tInfo%d\t%s\n", record, ++ninfo, info); info = getinfo((char *)NULL); } if (age > -1.0 && sex == NULL) sex = "?"; if (sex) printf("%s\tAgeSex\t%g\t%s\n", record, age, sex); } } char *token(char *p) { if (p) { while (*p && *p != ' ' && *p != '\t' && *p != '\n') p++; /* find whitespace */ while (*p && (*p == ' ' || *p == '\t' || *p == '\n')) p++; /* find first non-whitespace */ if (*p == '\0') p = NULL; } return (p); } char *ltimstr(WFDB_Time t) { static char p[10]; sprintf(p, "%ld", (long)(t/sfreq + 0.5)); return (p); } char *prog_name(char *s) { char *p = s + strlen(s); #ifdef MSDOS while (p >= s && *p != '\\' && *p != ':') { if (*p == '.') *p = '\0'; /* strip off extension */ if ('A' <= *p && *p <= 'Z') *p += 'a' - 'A'; /* convert to lower case */ p--; } #else while (p >= s && *p != '/') p--; #endif return (p+1); } void cleanup() { if (signame) { while (nclasses > 0) { free(signame[--nclasses]); free(sigclass[nclasses]); } free(sigclass); free(signame); signame = NULL; } }