Browse Source

Header parsing and beginning of diphone index table parsing

master
hadware 6 years ago
parent
commit
ea25e428af
  1. 114
      db_reader.py

114
db_reader.py

@ -1,7 +1,8 @@
import argparse import argparse
from dataclasses import dataclass from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import List, Tuple from struct import unpack_from, unpack
from typing import List, Tuple, BinaryIO
MBROLA_VOICES_FOLDER = Path("/usr/share/mbrola/") MBROLA_VOICES_FOLDER = Path("/usr/share/mbrola/")
argparser = argparse.ArgumentParser() argparser = argparse.ArgumentParser()
@ -11,6 +12,18 @@ argparser.add_argument("mbrola_db", type=Path,
PhonemeCode = int PhonemeCode = int
def read_str(io: BinaryIO):
"""Read a string from a binary IO"""
output_str = ""
while True:
char = unpack_from("<c", io.read(1))[0]
if char == "\x00": # null char at the end of string
break
else:
output_str += char
return output_str
@dataclass @dataclass
class DiphoneInfo: class DiphoneInfo:
left: PhonemeCode left: PhonemeCode
@ -30,22 +43,73 @@ class FrameType:
@dataclass @dataclass
class MbrolaDatabase: class MbrolaDatabase:
# TODO : init default values as specified in the database_init # TODO : init default values as specified in the database_init
freq: int # Sampling frequency of the database
mbr_period: int # Period of the MBR analysis
nb_diphone: int # Number of diphones in the database
size_mark: int # Size of the pitchmark part
pitch_marks: List[FrameType]
size_raw: int # Size of the wave part
raw_offset: int # Offset for raw samples in database
max_frame: int # Maximum number of frames encountered for a diphone in the dba
max_samples: int # Size of the diphone buffer= 0 means let me manage it myself
magic_header: Tuple[int, int] # Magic header of the database
version: str # version of the database
info: List[str]
silence_phone: str # silence symbol in the database
db_path: Path # name of database db_path: Path # name of database
coding: int = 1 # database format. Might be useless
freq: int = 0 # Sampling frequency of the database
mbr_period: int = 0 # Period of the MBR analysis
nb_diphone: int = 0 # Number of diphones in the database
size_mark: int = 0 # Size of the pitchmark part
size_raw: int = 0 # Size of the wave part
raw_offset: int = 0 # Offset for raw samples in database
max_frame: int = 0 # Maximum number of frames encountered for a diphone in the dba
max_samples: int = 0 # Size of the diphone buffer= 0 means let me manage it myself
magic_header: str = "MBROLA" # Magic header of the database
version: str = "2.06" # version of the database
info: List[str] = ""
silence_phone: str = "_" # silence symbol in the database
pitch_marks: List[FrameType] = field(default=list)
def read_header(self, db_file: BinaryIO):
"""Reads the database header"""
self.magic_header = db_file.read(6).decode()
self.version = db_file.read(5).decode()
self.nb_diphone = unpack_from("<h", db_file.read(2))[0]
old_size_mark = unpack_from("<H", db_file.read(2))[0]
if old_size_mark == 0:
self.size_mark = unpack_from("<i", db_file.read(4))[0]
else:
self.size_mark = old_size_mark
self.size_raw = unpack_from("<i", db_file.read(4))[0]
self.freq = unpack_from("<h", db_file.read(2))[0]
self.mbr_period = unpack_from("<B", db_file.read(1))[0]
self.coding = unpack_from("<B", db_file.read(1))[0]
def read_index(self, db_file: BinaryIO):
"""Reads the index table of diphones"""""
pass
def read_pitchmarks(self, db_file: BinaryIO):
i = 0
pm_index = 0 #  cumulative position in pitch mark vector
wav_index = 0 #   cumulative position in the waveform database
while (pm_index != self.size_mark and i < self.nb_diphone):
left = read_str(db_file)
right = read_str(db_file)
half_segment = unpack_from("<h", db_file.read(2))[0]
nb_frames = unpack_from("<B", db_file.read(1))[0]
nb_wframe = unpack_from("<B", db_file.read(1))[0]
position_pm = pm_index
pm_index += nb_frames
if pm_index == self.size_mark:
self.silence_phone = left
i += 1
def read_info(self, db_file: BinaryIO):
pass
def read_database(self):
with open(self.db_path, "rb") as db_file:
self.read_header(db_file)
self.read_index(db_file)
self.read_pitchmarks(db_file)
self.read_info(db_file)
if __name__ == "__main__": if __name__ == "__main__":
args = argparser.parse_args() args = argparser.parse_args()
@ -64,6 +128,18 @@ if __name__ == "__main__":
# should read "mbrola" # should read "mbrola"
mbr_db.magic_header = db_file.read(6).decode() mbr_db.magic_header = db_file.read(6).decode()
mbr_db.version = db_file.read(5).decode() mbr_db.version = db_file.read(5).decode()
mbr_db.nb_diphone = int.from_bytes(db_file.read(2), byteorder="little") mbr_db.nb_diphone = unpack_from("<h", db_file)[0]
old_size_mark = unpack_from("<H", db_file)[0]
if old_size_mark == 0:
mbr_db.size_mark = unpack_from("<i", db_file)
else:
mbr_db.size_mark = old_size_mark
mbr_db.size_raw = unpack_from("<i", db_file)
mbr_db.freq = unpack_from("<h", db_file)
mbr_db.mbr_period = unpack_from("<B", db_file)
mbr_db.coding = unpack_from("<B", db_file)
# TODO:
# ReadDatabaseIndex(dba) ||
# !ReadDatabasePitchMark(dba) ||
#  !ReadDatabaseInfo(dba)

Loading…
Cancel
Save