|
|
|
|
@ -1,7 +1,8 @@
|
|
|
|
|
import argparse |
|
|
|
|
from dataclasses import dataclass |
|
|
|
|
from dataclasses import dataclass, field |
|
|
|
|
from pathlib import Path |
|
|
|
|
from typing import List, Tuple |
|
|
|
|
from struct import unpack_from, unpack |
|
|
|
|
from typing import List, Tuple, BinaryIO |
|
|
|
|
|
|
|
|
|
MBROLA_VOICES_FOLDER = Path("/usr/share/mbrola/") |
|
|
|
|
argparser = argparse.ArgumentParser() |
|
|
|
|
@ -11,6 +12,18 @@ argparser.add_argument("mbrola_db", type=Path,
|
|
|
|
|
PhonemeCode = int |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_str(io: BinaryIO): |
|
|
|
|
"""Read a string from a binary IO""" |
|
|
|
|
output_str = "" |
|
|
|
|
while True: |
|
|
|
|
char = unpack_from("<c", io.read(1))[0] |
|
|
|
|
if char == "\x00": # null char at the end of string |
|
|
|
|
break |
|
|
|
|
else: |
|
|
|
|
output_str += char |
|
|
|
|
return output_str |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass |
|
|
|
|
class DiphoneInfo: |
|
|
|
|
left: PhonemeCode |
|
|
|
|
@ -30,22 +43,73 @@ class FrameType:
|
|
|
|
|
@dataclass |
|
|
|
|
class MbrolaDatabase: |
|
|
|
|
# TODO : init default values as specified in the database_init |
|
|
|
|
freq: int # Sampling frequency of the database |
|
|
|
|
mbr_period: int # Period of the MBR analysis |
|
|
|
|
nb_diphone: int # Number of diphones in the database |
|
|
|
|
size_mark: int # Size of the pitchmark part |
|
|
|
|
pitch_marks: List[FrameType] |
|
|
|
|
size_raw: int # Size of the wave part |
|
|
|
|
raw_offset: int # Offset for raw samples in database |
|
|
|
|
max_frame: int # Maximum number of frames encountered for a diphone in the dba |
|
|
|
|
max_samples: int # Size of the diphone buffer= 0 means let me manage it myself |
|
|
|
|
magic_header: Tuple[int, int] # Magic header of the database |
|
|
|
|
version: str # version of the database |
|
|
|
|
|
|
|
|
|
info: List[str] |
|
|
|
|
silence_phone: str # silence symbol in the database |
|
|
|
|
db_path: Path # name of database |
|
|
|
|
|
|
|
|
|
coding: int = 1 # database format. Might be useless |
|
|
|
|
freq: int = 0 # Sampling frequency of the database |
|
|
|
|
mbr_period: int = 0 # Period of the MBR analysis |
|
|
|
|
nb_diphone: int = 0 # Number of diphones in the database |
|
|
|
|
size_mark: int = 0 # Size of the pitchmark part |
|
|
|
|
size_raw: int = 0 # Size of the wave part |
|
|
|
|
raw_offset: int = 0 # Offset for raw samples in database |
|
|
|
|
max_frame: int = 0 # Maximum number of frames encountered for a diphone in the dba |
|
|
|
|
max_samples: int = 0 # Size of the diphone buffer= 0 means let me manage it myself |
|
|
|
|
magic_header: str = "MBROLA" # Magic header of the database |
|
|
|
|
version: str = "2.06" # version of the database |
|
|
|
|
info: List[str] = "" |
|
|
|
|
silence_phone: str = "_" # silence symbol in the database |
|
|
|
|
|
|
|
|
|
pitch_marks: List[FrameType] = field(default=list) |
|
|
|
|
|
|
|
|
|
def read_header(self, db_file: BinaryIO): |
|
|
|
|
"""Reads the database header""" |
|
|
|
|
self.magic_header = db_file.read(6).decode() |
|
|
|
|
self.version = db_file.read(5).decode() |
|
|
|
|
self.nb_diphone = unpack_from("<h", db_file.read(2))[0] |
|
|
|
|
old_size_mark = unpack_from("<H", db_file.read(2))[0] |
|
|
|
|
if old_size_mark == 0: |
|
|
|
|
self.size_mark = unpack_from("<i", db_file.read(4))[0] |
|
|
|
|
else: |
|
|
|
|
self.size_mark = old_size_mark |
|
|
|
|
self.size_raw = unpack_from("<i", db_file.read(4))[0] |
|
|
|
|
self.freq = unpack_from("<h", db_file.read(2))[0] |
|
|
|
|
self.mbr_period = unpack_from("<B", db_file.read(1))[0] |
|
|
|
|
self.coding = unpack_from("<B", db_file.read(1))[0] |
|
|
|
|
|
|
|
|
|
def read_index(self, db_file: BinaryIO): |
|
|
|
|
"""Reads the index table of diphones""""" |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
def read_pitchmarks(self, db_file: BinaryIO): |
|
|
|
|
|
|
|
|
|
i = 0 |
|
|
|
|
pm_index = 0 # cumulative position in pitch mark vector |
|
|
|
|
wav_index = 0 # cumulative position in the waveform database |
|
|
|
|
while (pm_index != self.size_mark and i < self.nb_diphone): |
|
|
|
|
left = read_str(db_file) |
|
|
|
|
right = read_str(db_file) |
|
|
|
|
half_segment = unpack_from("<h", db_file.read(2))[0] |
|
|
|
|
nb_frames = unpack_from("<B", db_file.read(1))[0] |
|
|
|
|
nb_wframe = unpack_from("<B", db_file.read(1))[0] |
|
|
|
|
|
|
|
|
|
position_pm = pm_index |
|
|
|
|
pm_index += nb_frames |
|
|
|
|
|
|
|
|
|
if pm_index == self.size_mark: |
|
|
|
|
self.silence_phone = left |
|
|
|
|
|
|
|
|
|
i += 1 |
|
|
|
|
|
|
|
|
|
def read_info(self, db_file: BinaryIO): |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
def read_database(self): |
|
|
|
|
with open(self.db_path, "rb") as db_file: |
|
|
|
|
self.read_header(db_file) |
|
|
|
|
self.read_index(db_file) |
|
|
|
|
self.read_pitchmarks(db_file) |
|
|
|
|
self.read_info(db_file) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
args = argparser.parse_args() |
|
|
|
|
@ -64,6 +128,18 @@ if __name__ == "__main__":
|
|
|
|
|
# should read "mbrola" |
|
|
|
|
mbr_db.magic_header = db_file.read(6).decode() |
|
|
|
|
mbr_db.version = db_file.read(5).decode() |
|
|
|
|
mbr_db.nb_diphone = int.from_bytes(db_file.read(2), byteorder="little") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mbr_db.nb_diphone = unpack_from("<h", db_file)[0] |
|
|
|
|
old_size_mark = unpack_from("<H", db_file)[0] |
|
|
|
|
if old_size_mark == 0: |
|
|
|
|
mbr_db.size_mark = unpack_from("<i", db_file) |
|
|
|
|
else: |
|
|
|
|
mbr_db.size_mark = old_size_mark |
|
|
|
|
mbr_db.size_raw = unpack_from("<i", db_file) |
|
|
|
|
mbr_db.freq = unpack_from("<h", db_file) |
|
|
|
|
mbr_db.mbr_period = unpack_from("<B", db_file) |
|
|
|
|
mbr_db.coding = unpack_from("<B", db_file) |
|
|
|
|
|
|
|
|
|
# TODO: |
|
|
|
|
# ReadDatabaseIndex(dba) || |
|
|
|
|
# !ReadDatabasePitchMark(dba) || |
|
|
|
|
# !ReadDatabaseInfo(dba) |
|
|
|
|
|