Merge pull request #246 from MIT-LCP/mit2wav

Lucas-Mc · web-flow · commit 1dd4b5c63dd6 · 2020-07-27T07:05:26.000-04:00
Produces WAV file from WFDB format
diff --git a/wfdb/__init__.py b/wfdb/__init__.py
@@ -1,5 +1,6 @@
 from wfdb.io.record import (Record, MultiRecord, rdheader, rdrecord, rdsamp,
-                            wrsamp, dl_database, edf2mit, mit2edf, wav2mit, wfdb2mat, sampfreq, signame)
+                            wrsamp, dl_database, edf2mit, mit2edf, wav2mit, mit2wav,
+                            wfdb2mat, sampfreq, signame)
 from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
                                 show_ann_classes, ann2rr)
 from wfdb.io.download import get_dbs, get_record_list, dl_files, set_db_index_url
diff --git a/wfdb/io/__init__.py b/wfdb/io/__init__.py
@@ -1,5 +1,6 @@
 from wfdb.io.record import (Record, MultiRecord, rdheader, rdrecord, rdsamp, wrsamp,
-                            dl_database, edf2mit, mit2edf, wav2mit, wfdb2mat, sampfreq, signame, SIGNAL_CLASSES)
+                            dl_database, edf2mit, mit2edf, wav2mit, mit2wav, wfdb2mat,
+                            sampfreq, signame, SIGNAL_CLASSES)
 from wfdb.io._signal import est_res, wr_dat_file
 from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
                                 show_ann_classes, ann2rr)
diff --git a/wfdb/io/record.py b/wfdb/io/record.py
@@ -1891,6 +1891,160 @@ def mit2edf(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None,
             print('WARNING: output contains an invalid character, {}, at byte {}'.format(val, i))
 
 
+def mit2wav(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None,
+            output_filename='', write_header=False):
+    """
+    This program converts a WFDB record into .wav format (format 16, multiplexed
+    signals, with embedded header information).  Use 'wav2mit' to perform the
+    reverse conversion.
+
+    Parameters
+    ----------
+    record_name : str
+        The name of the input WFDB record to be read. Can also work with both
+        EDF and WAV files.
+    pn_dir : str, optional
+        Option used to stream data from Physionet. The Physionet
+        database directory from which to find the required record files.
+        eg. For record '100' in 'http://physionet.org/content/mitdb'
+        pn_dir='mitdb'.
+    sampfrom : int, optional
+        The starting sample number to read for all channels.
+    sampto : int, 'end', optional
+        The sample number at which to stop reading for all channels.
+        Reads the entire duration by default.
+    channels : list, optional
+        List of integer indices specifying the channels to be read.
+        Reads all channels by default.
+    output_filename : str, optional
+        The desired name of the output file. If this value set to the
+        default value of '', then the output filename will be 'REC.wav'.
+    write_header : bool, optional
+        Whether to write (True) or not to write (False) a header file to
+        accompany the generated WAV file. The default value is 'False'.
+
+    Returns
+    -------
+    N/A
+
+    Notes
+    -----
+    Files that can be processed successfully using `wav2mit` always have exactly
+    three chunks (a header chunk, a format chunk, and a data chunk).  In .wav
+    files, binary data are always written in little-endian format (least
+    significant byte first). The format of `wav2mit`'s input files is as follows:
+
+    [Header chunk]
+    Bytes  0 -  3: "RIFF" [4 ASCII characters]
+    Bytes  4 -  7: L-8 (number of bytes to follow in the file, excluding bytes 0-7)
+    Bytes  8 - 11: "WAVE" [4 ASCII characters]
+
+    [Format chunk]
+    Bytes 12 - 15: "fmt " [4 ASCII characters, note trailing space]
+    Bytes 16 - 19: 16 (format chunk length in bytes, excluding bytes 12-19)
+    Bytes 20 - 35: format specification, consisting of:
+    Bytes 20 - 21: 1 (format tag, indicating no compression is used)
+    Bytes 22 - 23: number of signals (1 - 65535)
+    Bytes 24 - 27: sampling frequency in Hz (per signal)
+                   Note that the sampling frequency in a .wav file must be an
+                   integer multiple of 1 Hz, a restriction that is not imposed
+                   by MIT (WFDB) format.
+    Bytes 28 - 31: bytes per second (sampling frequency * frame size in bytes)
+    Bytes 32 - 33: frame size in bytes
+    Bytes 34 - 35: bits per sample (ADC resolution in bits)
+                   Note that the actual ADC resolution (e.g., 12) is written in
+                   this field, although each output sample is right-padded to fill
+                   a full (16-bit) word. (.wav format allows for 8, 16, 24, and
+                   32 bits per sample)
+
+    [Data chunk]
+    Bytes 36 - 39: "data" [4 ASCII characters]
+    Bytes 40 - 43: L-44 (number of bytes to follow in the data chunk)
+    Bytes 44 - L-1: sample data, consisting of:
+    Bytes 44 - 45: sample 0, channel 0
+    Bytes 46 - 47: sample 0, channel 1
+    ... etc. (same order as in a multiplexed WFDB signal file)
+
+    Examples
+    --------
+    >>> wfdb.mit2wav('100', pn_dir='pwave')
+
+    The output file name is '100.wav'
+
+    """
+    record = rdrecord(record_name, pn_dir=pn_dir, sampfrom=sampfrom,
+                      sampto=sampto, smooth_frames=False)
+    record_name_out = record_name.split(os.sep)[-1].replace('-','_')
+
+    # Get information needed for the header and format chunks
+    num_samps = record.sig_len
+    samps_per_second = record.fs
+    frame_length = record.n_sig * 2
+    chunk_bytes = num_samps * frame_length
+    file_bytes = chunk_bytes + 36
+    bits_per_sample = max(record.adc_res)
+    offset = record.adc_zero
+    shift = [(16 - v) for v in record.adc_res]
+
+    # Start writing the file
+    if output_filename != '':
+        if not output_filename.endswith('.wav'):
+            raise Exception("Name of output file must end in '.wav'")
+    else:
+        output_filename = record_name_out + '.wav'
+
+    with open(output_filename, 'wb') as f:
+        # Write the WAV file identifier
+        f.write(struct.pack('>4s', b'RIFF'))
+        # Write the number of bytes to follow in the file
+        # (num_samps*frame_length) sample bytes, and 36 more bytes of miscellaneous embedded header
+        f.write(struct.pack('<I', file_bytes))
+        # Descriptor for the format of the file
+        f.write(struct.pack('>8s', b'WAVEfmt '))
+        # Number of bytes to follow in the format chunk
+        f.write(struct.pack('<I', 16))
+        # The format tag
+        f.write(struct.pack('<H', 1))
+        # The number of signals
+        f.write(struct.pack('<H', record.n_sig))
+        # The samples per second
+        f.write(struct.pack('<I', samps_per_second))
+        # The number of bytes per second
+        f.write(struct.pack('<I', samps_per_second * frame_length))
+        # The length of each frame
+        f.write(struct.pack('<H', frame_length))
+        # The number of bits per samples
+        f.write(struct.pack('<H', bits_per_sample))
+        # The descriptor to indicate that the data information is next
+        f.write(struct.pack('>4s', b'data'))
+        # The number of bytes in the signal data chunk
+        f.write(struct.pack('<I', chunk_bytes))
+        # Write the signal data... the closest I can get to the original implementation
+        # Mismatched elements: 723881 / 15400000 (4.7%)
+        # Max absolute difference: 2
+        # Max relative difference: 0.00444444
+        #  x: array([ -322,  3852, -9246, ...,     0,     0,     0], dtype=int16)
+        #  y: array([ -322,  3852, -9246, ...,     0,     0,     0], dtype=int16)
+        sig_data = np.left_shift(np.subtract(record.adc(), offset), shift).reshape((1, -1)).astype(np.int16)
+        sig_data.tofile(f)
+
+    # If asked to write the accompanying header file
+    if write_header:
+        record.adc_zero = record.n_sig * [0]
+        record.adc_res = record.n_sig * [16]
+        record.adc_gain = [(r * (1 << shift[i])) for i,r in enumerate(record.adc_gain)]
+        record.baseline = [(b - offset[i]) for i,b in enumerate(record.baseline)]
+        record.baseline = [(b * (1 << shift[i])) for i,b in enumerate(record.baseline)]
+        record.file_name = record.n_sig * [record_name_out + '.wav']
+        record.block_size = record.n_sig * [0]
+        record.fmt = record.n_sig * ['16']
+        record.samps_per_fram = record.n_sig * [1]
+        record.init_value = sig_data[0][:record.n_sig].tolist()
+        record.byte_offset = record.n_sig * [44]
+        # Write the header file
+        record.wrheader()
+
+
 def wav2mit(record_name, pn_dir=None, delete_file=True, record_only=False):
     """
     Convert .wav (format 16, multiplexed signals, with embedded header