From d790725000ecfe34684b18a8ab3ef91740fc9c67 Mon Sep 17 00:00:00 2001 From: hadware Date: Tue, 26 May 2020 16:50:03 +0200 Subject: [PATCH] Done with reading the whole db higher-level parameters. Now let's find out how to read the actual audio content --- db_reader.py | 61 ++++++++++++++++++++++++++++++++++++------------ requirements.txt | 1 + 2 files changed, 47 insertions(+), 15 deletions(-) create mode 100644 requirements.txt diff --git a/db_reader.py b/db_reader.py index 6a3957b..7a7f4f0 100644 --- a/db_reader.py +++ b/db_reader.py @@ -1,8 +1,10 @@ import argparse from dataclasses import dataclass, field +from io import SEEK_SET from pathlib import Path from struct import unpack_from, unpack -from typing import List, Tuple, BinaryIO +from typing import List, Tuple, BinaryIO, Dict +import numpy as np MBROLA_VOICES_FOLDER = Path("/usr/share/mbrola/") argparser = argparse.ArgumentParser() @@ -10,6 +12,7 @@ argparser.add_argument("mbrola_db", type=Path, help="Mrbola db name or direct path") PhonemeCode = int +Diphone = Tuple[str,str] def read_str(io: BinaryIO): @@ -26,14 +29,22 @@ def read_str(io: BinaryIO): @dataclass class DiphoneInfo: - left: PhonemeCode - right: PhonemeCode + left: str + right: str pos_wave: int # position in SPEECH_FILE halfseg: int # position of center of diphone pos_pm: int # index in PITCHMARK_FILE nb_frame: int # Number of pitch markers + @property + def left_code(self): + pass + + @property + def right_code(self): + pass + @dataclass class FrameType: @@ -56,10 +67,11 @@ class MbrolaDatabase: max_samples: int = 0 # Size of the diphone buffer= 0 means let me manage it myself magic_header: str = "MBROLA" # Magic header of the database version: str = "2.06" # version of the database - info: List[str] = "" + info: List[str] = None silence_phone: str = "_" # silence symbol in the database - pitch_marks: List[FrameType] = field(default=list) + diphone_table: Dict[Diphone, DiphoneInfo] = field(default_factory=dict) + pitch_marks: np.ndarray = None def read_header(self, db_file: BinaryIO): """Reads the database header""" @@ -78,16 +90,12 @@ class MbrolaDatabase: def read_index(self, db_file: BinaryIO): """Reads the index table of diphones""""" - pass - - def read_pitchmarks(self, db_file: BinaryIO): - i = 0 pm_index = 0 #  cumulative position in pitch mark vector wav_index = 0 #   cumulative position in the waveform database - while (pm_index != self.size_mark and i < self.nb_diphone): - left = read_str(db_file) - right = read_str(db_file) + while pm_index != self.size_mark and i < self.nb_diphone: + left_phone = read_str(db_file) + right_phone = read_str(db_file) half_segment = unpack_from("