|
|
|
|
@ -1,8 +1,10 @@
|
|
|
|
|
import argparse |
|
|
|
|
from dataclasses import dataclass, field |
|
|
|
|
from io import SEEK_SET |
|
|
|
|
from pathlib import Path |
|
|
|
|
from struct import unpack_from, unpack |
|
|
|
|
from typing import List, Tuple, BinaryIO |
|
|
|
|
from typing import List, Tuple, BinaryIO, Dict |
|
|
|
|
import numpy as np |
|
|
|
|
|
|
|
|
|
MBROLA_VOICES_FOLDER = Path("/usr/share/mbrola/") |
|
|
|
|
argparser = argparse.ArgumentParser() |
|
|
|
|
@ -10,6 +12,7 @@ argparser.add_argument("mbrola_db", type=Path,
|
|
|
|
|
help="Mrbola db name or direct path") |
|
|
|
|
|
|
|
|
|
PhonemeCode = int |
|
|
|
|
Diphone = Tuple[str,str] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_str(io: BinaryIO): |
|
|
|
|
@ -26,14 +29,22 @@ def read_str(io: BinaryIO):
|
|
|
|
|
|
|
|
|
|
@dataclass |
|
|
|
|
class DiphoneInfo: |
|
|
|
|
left: PhonemeCode |
|
|
|
|
right: PhonemeCode |
|
|
|
|
left: str |
|
|
|
|
right: str |
|
|
|
|
|
|
|
|
|
pos_wave: int # position in SPEECH_FILE |
|
|
|
|
halfseg: int # position of center of diphone |
|
|
|
|
pos_pm: int # index in PITCHMARK_FILE |
|
|
|
|
nb_frame: int # Number of pitch markers |
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
def left_code(self): |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
@property |
|
|
|
|
def right_code(self): |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass |
|
|
|
|
class FrameType: |
|
|
|
|
@ -56,10 +67,11 @@ class MbrolaDatabase:
|
|
|
|
|
max_samples: int = 0 # Size of the diphone buffer= 0 means let me manage it myself |
|
|
|
|
magic_header: str = "MBROLA" # Magic header of the database |
|
|
|
|
version: str = "2.06" # version of the database |
|
|
|
|
info: List[str] = "" |
|
|
|
|
info: List[str] = None |
|
|
|
|
silence_phone: str = "_" # silence symbol in the database |
|
|
|
|
|
|
|
|
|
pitch_marks: List[FrameType] = field(default=list) |
|
|
|
|
diphone_table: Dict[Diphone, DiphoneInfo] = field(default_factory=dict) |
|
|
|
|
pitch_marks: np.ndarray = None |
|
|
|
|
|
|
|
|
|
def read_header(self, db_file: BinaryIO): |
|
|
|
|
"""Reads the database header""" |
|
|
|
|
@ -78,16 +90,12 @@ class MbrolaDatabase:
|
|
|
|
|
|
|
|
|
|
def read_index(self, db_file: BinaryIO): |
|
|
|
|
"""Reads the index table of diphones""""" |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
def read_pitchmarks(self, db_file: BinaryIO): |
|
|
|
|
|
|
|
|
|
i = 0 |
|
|
|
|
pm_index = 0 # cumulative position in pitch mark vector |
|
|
|
|
wav_index = 0 # cumulative position in the waveform database |
|
|
|
|
while (pm_index != self.size_mark and i < self.nb_diphone): |
|
|
|
|
left = read_str(db_file) |
|
|
|
|
right = read_str(db_file) |
|
|
|
|
while pm_index != self.size_mark and i < self.nb_diphone: |
|
|
|
|
left_phone = read_str(db_file) |
|
|
|
|
right_phone = read_str(db_file) |
|
|
|
|
half_segment = unpack_from("<h", db_file.read(2))[0] |
|
|
|
|
nb_frames = unpack_from("<B", db_file.read(1))[0] |
|
|
|
|
nb_wframe = unpack_from("<B", db_file.read(1))[0] |
|
|
|
|
@ -96,12 +104,35 @@ class MbrolaDatabase:
|
|
|
|
|
pm_index += nb_frames |
|
|
|
|
|
|
|
|
|
if pm_index == self.size_mark: |
|
|
|
|
self.silence_phone = left |
|
|
|
|
|
|
|
|
|
self.silence_phone = left_phone |
|
|
|
|
|
|
|
|
|
new_wav_index = wav_index |
|
|
|
|
wav_index = nb_wframe * self.mbr_period |
|
|
|
|
new_diph = DiphoneInfo(left=left_phone, right=right_phone, |
|
|
|
|
pos_wave=new_wav_index, |
|
|
|
|
halfseg=half_segment, |
|
|
|
|
pos_pm=position_pm, |
|
|
|
|
nb_frame=nb_frames) |
|
|
|
|
self.diphone_table[(left_phone, right_phone)] = new_diph |
|
|
|
|
|
|
|
|
|
# keep track of the phoneme with the biggest number of frames |
|
|
|
|
if self.max_frame < nb_wframe: |
|
|
|
|
self.max_frame = nb_wframe |
|
|
|
|
i += 1 |
|
|
|
|
|
|
|
|
|
def read_pitchmarks(self, db_file: BinaryIO): |
|
|
|
|
round_size = (self.size_mark + 3) // 4 |
|
|
|
|
self.pitch_marks = np.array(f"<{round_size}B", db_file.read(round_size)) |
|
|
|
|
self.raw_offset = db_file.tell() |
|
|
|
|
|
|
|
|
|
def read_info(self, db_file: BinaryIO): |
|
|
|
|
pass |
|
|
|
|
db_file.seek(self.raw_offset + self.size_raw, whence=SEEK_SET) |
|
|
|
|
self.info = [] |
|
|
|
|
while True: |
|
|
|
|
try: |
|
|
|
|
self.info.append(read_str(db_file)) |
|
|
|
|
except EOFError: |
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
def read_database(self): |
|
|
|
|
with open(self.db_path, "rb") as db_file: |
|
|
|
|
|