Done with reading the whole db higher-level parameters. Now let's find out how to read the actual audio content

6 years ago · d790725000
2 changed files with 47 additions and 15 deletions
--- a/db_reader.py
+++ b/db_reader.py
@ -1,8 +1,10 @@
 import argparse
 from dataclasses import dataclass, field
+from io import SEEK_SET
 from pathlib import Path
 from struct import unpack_from, unpack
-from typing import List, Tuple, BinaryIO
+from typing import List, Tuple, BinaryIO, Dict
+import numpy as np

 MBROLA_VOICES_FOLDER = Path("/usr/share/mbrola/")
 argparser = argparse.ArgumentParser()
@ -10,6 +12,7 @@ argparser.add_argument("mbrola_db", type=Path,
                       help="Mrbola db name or direct path")

 PhonemeCode = int
+Diphone = Tuple[str,str]


 def read_str(io: BinaryIO):
@ -26,14 +29,22 @@ def read_str(io: BinaryIO):

@dataclass
 class DiphoneInfo:
-    left: PhonemeCode
-    right: PhonemeCode
+    left: str
+    right: str

    pos_wave: int  # position in SPEECH_FILE
    halfseg: int  # position of center of diphone
    pos_pm: int  # index in PITCHMARK_FILE
    nb_frame: int  # Number of pitch markers

+    @property
+    def left_code(self):
+        pass
+
+    @property
+    def right_code(self):
+        pass
+

@dataclass
 class FrameType:
@ -56,10 +67,11 @@ class MbrolaDatabase:
    max_samples: int = 0  # Size of the diphone buffer= 0 means let me manage it myself
    magic_header: str = "MBROLA"  # Magic header of the database
    version: str = "2.06"  # version of the database
-    info: List[str] = ""
+    info: List[str] = None
    silence_phone: str = "_"  # silence symbol in the database

-    pitch_marks: List[FrameType] = field(default=list)
+    diphone_table: Dict[Diphone, DiphoneInfo] = field(default_factory=dict)
+    pitch_marks: np.ndarray = None

    def read_header(self, db_file: BinaryIO):
        """Reads the database header"""
@ -78,16 +90,12 @@ class MbrolaDatabase:

    def read_index(self, db_file: BinaryIO):
        """Reads the index table of diphones"""""
-        pass
-
-    def read_pitchmarks(self, db_file: BinaryIO):
-
        i = 0
        pm_index = 0  #  cumulative position in pitch mark vector
        wav_index = 0  #   cumulative position in the waveform database
-        while (pm_index != self.size_mark and i < self.nb_diphone):
-            left = read_str(db_file)
-            right = read_str(db_file)
+        while pm_index != self.size_mark and i < self.nb_diphone:
+            left_phone = read_str(db_file)
+            right_phone = read_str(db_file)
            half_segment = unpack_from("<h", db_file.read(2))[0]
            nb_frames = unpack_from("<B", db_file.read(1))[0]
            nb_wframe = unpack_from("<B", db_file.read(1))[0]
@ -96,12 +104,35 @@ class MbrolaDatabase:
            pm_index += nb_frames

            if pm_index == self.size_mark:
-                self.silence_phone = left
-
+                self.silence_phone = left_phone
+
+            new_wav_index = wav_index
+            wav_index = nb_wframe * self.mbr_period
+            new_diph = DiphoneInfo(left=left_phone, right=right_phone,
+                                   pos_wave=new_wav_index,
+                                   halfseg=half_segment,
+                                   pos_pm=position_pm,
+                                   nb_frame=nb_frames)
+            self.diphone_table[(left_phone, right_phone)] = new_diph
+
+            # keep track of the phoneme with the biggest number of frames
+            if self.max_frame < nb_wframe:
+                self.max_frame = nb_wframe
            i += 1

+    def read_pitchmarks(self, db_file: BinaryIO):
+        round_size = (self.size_mark + 3) // 4
+        self.pitch_marks = np.array(f"<{round_size}B", db_file.read(round_size))
+        self.raw_offset = db_file.tell()
+
    def read_info(self, db_file: BinaryIO):
-        pass
+        db_file.seek(self.raw_offset + self.size_raw, whence=SEEK_SET)
+        self.info = []
+        while True:
+            try:
+                self.info.append(read_str(db_file))
+            except EOFError:
+                break

    def read_database(self):
        with open(self.db_path, "rb") as db_file:
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1 @@
+numpy