Rumah > Artikel > pembangunan bahagian belakang > Bagaimana untuk melaksanakan alat sintesis pertuturan Python
TTS (Text To Speech) ialah teknologi sintesis pertuturan yang membolehkan mesin memainkan teks input dalam bentuk pertuturan kesan mesin bercakap.
TTS dibahagikan kepada pemprosesan pertuturan dan sintesis pertuturan. Teks input mula-mula dikenali oleh mesin, dan kemudian sintesis pertuturan dilakukan berdasarkan perpustakaan pertuturan. Terdapat banyak antara muka TTS yang boleh dipanggil, seperti antara muka sintesis pertuturan Baidu Smart Cloud. Microsoft juga menyediakan antara muka TTS dalam sistem Windows, yang boleh dipanggil untuk melaksanakan fungsi sintesis pertuturan TTS luar talian.
Artikel ini akan menggunakan perpustakaan pyttsx3 sebagai demonstrasi untuk menulis alat sintesis pertuturan.
Pasang PyQt5 dan alatan reka bentuk GUInya
# 安装PyQt5 pip install PyQt5 # 安装PyQt5设计器 pip install PyQt5Designer
Editor yang digunakan dalam artikel ini ialah VSCode, bukan PyCharm Mungkin terdapat perbezaan dalam cara menggunakan PyQt5, khususnya Ia boleh dikonfigurasikan mengikut situasi sebenar semasa digunakan.
Pasang pyttsx3
pip install pyttsx3
Anda boleh merujuk kepada rajah di bawah untuk mereka bentuk antara muka GUI yang ringkas Memandangkan artikel ini adalah contoh yang berfungsi, estetika antara muka tidak dipertimbangkan.
Antara muka harus mempunyai kotak input teks untuk memasukkan teks untuk ditukar kepada pertuturan dan butang main untuk mencetuskan kaedah main balik suara. Kelajuan pertuturan, kelantangan dan bahasa boleh dipilih mengikut keperluan.
Menggunakan alat reka bentuk PyQt5, anda boleh menjana kod UI (XML) berikut berdasarkan antara muka GUI yang dikonfigurasikan di atas:
<?xml version="1.0" encoding="UTF-8"?> <ui version="4.0"> <class>Form</class> <widget class="QWidget" name="Form"> <property name="geometry"> <rect> <x>0</x> <y>0</y> <width>313</width> <height>284</height> </rect> </property> <property name="windowTitle"> <string>语音合成器</string> </property> <property name="windowIcon"> <iconset> <normaloff>voice.ico</normaloff>voice.ico</iconset> </property> <widget class="QWidget" name="verticalLayoutWidget"> <property name="geometry"> <rect> <x>10</x> <y>10</y> <width>291</width> <height>261</height> </rect> </property> <layout class="QVBoxLayout" name="verticalLayout"> <property name="spacing"> <number>20</number> </property> <item> <layout class="QHBoxLayout" name="horizontalLayout_2"> <item> <widget class="QLabel" name="label"> <property name="text"> <string>播报文本</string> </property> <property name="alignment"> <set>Qt::AlignJustify|Qt::AlignTop</set> </property> </widget> </item> <item> <widget class="QTextEdit" name="tbx_text"/> </item> </layout> </item> <item> <layout class="QHBoxLayout" name="horizontalLayout_4"> <item> <widget class="QLabel" name="label_3"> <property name="text"> <string>语速</string> </property> </widget> </item> <item> <widget class="QSlider" name="slider_rate"> <property name="maximum"> <number>300</number> </property> <property name="orientation"> <enum>Qt::Horizontal</enum> </property> </widget> </item> <item> <widget class="QLabel" name="label_rate"> <property name="minimumSize"> <size> <width>30</width> <height>0</height> </size> </property> <property name="text"> <string>0</string> </property> <property name="alignment"> <set>Qt::AlignCenter</set> </property> </widget> </item> </layout> </item> <item> <layout class="QHBoxLayout" name="horizontalLayout_3"> <item> <widget class="QLabel" name="label_2"> <property name="text"> <string>音量</string> </property> </widget> </item> <item> <widget class="QSlider" name="slider_volumn"> <property name="maximum"> <number>100</number> </property> <property name="orientation"> <enum>Qt::Horizontal</enum> </property> </widget> </item> <item> <widget class="QLabel" name="label_volumn"> <property name="minimumSize"> <size> <width>30</width> <height>0</height> </size> </property> <property name="text"> <string>0</string> </property> <property name="alignment"> <set>Qt::AlignCenter</set> </property> </widget> </item> </layout> </item> <item> <layout class="QHBoxLayout" name="horizontalLayout"> <item> <widget class="QLabel" name="label_4"> <property name="text"> <string>选择语言</string> </property> </widget> </item> <item> <widget class="QRadioButton" name="rbtn_zh"> <property name="text"> <string>中文</string> </property> <property name="checked"> <bool>true</bool> </property> </widget> </item> <item> <widget class="QRadioButton" name="rbtn_en"> <property name="text"> <string>英文</string> </property> </widget> </item> </layout> </item> <item> <layout class="QHBoxLayout" name="horizontalLayout_5"> <item> <widget class="QLabel" name="label_5"> <property name="minimumSize"> <size> <width>60</width> <height>0</height> </size> </property> <property name="text"> <string/> </property> </widget> </item> <item> <widget class="QPushButton" name="btn_play"> <property name="minimumSize"> <size> <width>0</width> <height>30</height> </size> </property> <property name="text"> <string>播放</string> </property> </widget> </item> </layout> </item> </layout> </widget> </widget> <resources/> <connections/> </ui>
Akhir sekali, menggunakan alat antara muka PyQt5, anda boleh menjana kod UI (XML) berikut berdasarkan kod UI di atas: Kelas borang berikut:
# -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'd:\Program\VSCode\Python\TTS_PyQT\tts_form.ui' # # Created by: PyQt5 UI code generator 5.15.7 # # WARNING: Any manual changes made to this file will be lost when pyuic5 is # run again. Do not edit this file unless you know what you are doing. from PyQt5 import QtCore, QtGui, QtWidgets class Ui_Form(object): def setupUi(self, Form): Form.setObjectName("Form") Form.resize(313, 284) icon = QtGui.QIcon() icon.addPixmap( QtGui.QPixmap("./voice.ico"), QtGui.QIcon.Normal, QtGui.QIcon.Off) Form.setWindowIcon(icon) self.verticalLayoutWidget = QtWidgets.QWidget(Form) self.verticalLayoutWidget.setGeometry(QtCore.QRect(10, 10, 291, 261)) self.verticalLayoutWidget.setObjectName("verticalLayoutWidget") self.verticalLayout = QtWidgets.QVBoxLayout(self.verticalLayoutWidget) self.verticalLayout.setContentsMargins(0, 0, 0, 0) self.verticalLayout.setSpacing(20) self.verticalLayout.setObjectName("verticalLayout") self.horizontalLayout_2 = QtWidgets.QHBoxLayout() self.horizontalLayout_2.setObjectName("horizontalLayout_2") self.label = QtWidgets.QLabel(self.verticalLayoutWidget) self.label.setAlignment(QtCore.Qt.AlignJustify | QtCore.Qt.AlignTop) self.label.setObjectName("label") self.horizontalLayout_2.addWidget(self.label) self.tbx_text = QtWidgets.QTextEdit(self.verticalLayoutWidget) self.tbx_text.setObjectName("tbx_text") self.horizontalLayout_2.addWidget(self.tbx_text) self.verticalLayout.addLayout(self.horizontalLayout_2) self.horizontalLayout_4 = QtWidgets.QHBoxLayout() self.horizontalLayout_4.setObjectName("horizontalLayout_4") self.label_3 = QtWidgets.QLabel(self.verticalLayoutWidget) self.label_3.setObjectName("label_3") self.horizontalLayout_4.addWidget(self.label_3) self.slider_rate = QtWidgets.QSlider(self.verticalLayoutWidget) self.slider_rate.setMaximum(300) self.slider_rate.setOrientation(QtCore.Qt.Horizontal) self.slider_rate.setObjectName("slider_rate") self.horizontalLayout_4.addWidget(self.slider_rate) self.label_rate = QtWidgets.QLabel(self.verticalLayoutWidget) self.label_rate.setMinimumSize(QtCore.QSize(30, 0)) self.label_rate.setAlignment(QtCore.Qt.AlignCenter) self.label_rate.setObjectName("label_rate") self.horizontalLayout_4.addWidget(self.label_rate) self.verticalLayout.addLayout(self.horizontalLayout_4) self.horizontalLayout_3 = QtWidgets.QHBoxLayout() self.horizontalLayout_3.setObjectName("horizontalLayout_3") self.label_2 = QtWidgets.QLabel(self.verticalLayoutWidget) self.label_2.setObjectName("label_2") self.horizontalLayout_3.addWidget(self.label_2) self.slider_volumn = QtWidgets.QSlider(self.verticalLayoutWidget) self.slider_volumn.setMaximum(100) self.slider_volumn.setOrientation(QtCore.Qt.Horizontal) self.slider_volumn.setObjectName("slider_volumn") self.horizontalLayout_3.addWidget(self.slider_volumn) self.label_volumn = QtWidgets.QLabel(self.verticalLayoutWidget) self.label_volumn.setMinimumSize(QtCore.QSize(30, 0)) self.label_volumn.setAlignment(QtCore.Qt.AlignCenter) self.label_volumn.setObjectName("label_volumn") self.horizontalLayout_3.addWidget(self.label_volumn) self.verticalLayout.addLayout(self.horizontalLayout_3) self.horizontalLayout = QtWidgets.QHBoxLayout() self.horizontalLayout.setObjectName("horizontalLayout") self.label_4 = QtWidgets.QLabel(self.verticalLayoutWidget) self.label_4.setObjectName("label_4") self.horizontalLayout.addWidget(self.label_4) self.rbtn_zh = QtWidgets.QRadioButton(self.verticalLayoutWidget) self.rbtn_zh.setChecked(True) self.rbtn_zh.setObjectName("rbtn_zh") self.horizontalLayout.addWidget(self.rbtn_zh) self.rbtn_en = QtWidgets.QRadioButton(self.verticalLayoutWidget) self.rbtn_en.setObjectName("rbtn_en") self.horizontalLayout.addWidget(self.rbtn_en) self.verticalLayout.addLayout(self.horizontalLayout) self.horizontalLayout_5 = QtWidgets.QHBoxLayout() self.horizontalLayout_5.setObjectName("horizontalLayout_5") self.label_5 = QtWidgets.QLabel(self.verticalLayoutWidget) self.label_5.setMinimumSize(QtCore.QSize(60, 0)) self.label_5.setText("") self.label_5.setObjectName("label_5") self.horizontalLayout_5.addWidget(self.label_5) self.btn_play = QtWidgets.QPushButton(self.verticalLayoutWidget) self.btn_play.setMinimumSize(QtCore.QSize(0, 30)) self.btn_play.setObjectName("btn_play") self.horizontalLayout_5.addWidget(self.btn_play) self.verticalLayout.addLayout(self.horizontalLayout_5) self.retranslateUi(Form) QtCore.QMetaObject.connectSlotsByName(Form) def retranslateUi(self, Form): _translate = QtCore.QCoreApplication.translate Form.setWindowTitle(_translate("Form", "语音合成器")) self.label.setText(_translate("Form", "播报文本")) self.label_3.setText(_translate("Form", "语速")) self.label_rate.setText(_translate("Form", "0")) self.label_2.setText(_translate("Form", "音量")) self.label_volumn.setText(_translate("Form", "0")) self.label_4.setText(_translate("Form", "选择语言")) self.rbtn_zh.setText(_translate("Form", "中文")) self.rbtn_en.setText(_translate("Form", "英文")) self.btn_play.setText(_translate("Form", "播放"))
Jika anda menyalin kod ini terus, ikon mungkin hilang. Ini memerlukan pengubahsuaian konfigurasi ikon mengikut situasi sebenar dan menambah fail ikon ico untuk digunakan.
Mula-mula kita perlu memulakan dan mendapatkan objek enjin pertuturan untuk sintesis pertuturan.
# tts对象 engine = pyttsx3.init()
Kita boleh mengubah suai sifat objek sintesis pertuturan melalui kaedah setProperty objek:
属性名 | 解释 |
rate | 以每分钟字数表示的整数语速 |
volume | 音量,取值范围为[0.0, 1.0] |
voices | 语音的字符串标识符 |
Kod kelas alat suara adalah seperti berikut. sila rujuk komen:
import pyttsx3 class VoiceEngine(): ''' tts 语音工具类 ''' def __init__(self): ''' 初始化 ''' # tts对象 self.__engine = pyttsx3.init() # 语速 self.__rate = 150 # 音量 self.__volume = 100 # 语音ID,0为中文,1为英文 self.__voice = 0 @property def Rate(self): ''' 语速属性 ''' return self.__rate @Rate.setter def Rate(self, value): self.__rate = value @property def Volume(self): ''' 音量属性 ''' return self.__volume @Volume.setter def Volume(self, value): self.__volume = value @property def VoiceID(self): ''' 语音ID:0 -- 中文;1 -- 英文 ''' return self.__voice @VoiceID.setter def VoiceID(self, value): self.__voice = value def Say(self, text): ''' 播放语音 ''' self.__engine.setProperty('rate', self.__rate) self.__engine.setProperty('volume', self.__volume) # 获取可用语音列表,并设置语音 voices = self.__engine.getProperty('voices') self.__engine.setProperty('voice', voices[self.__voice].id) # 保存语音文件 # self.__engine.save_to_file(text, 'test.mp3') self.__engine.say(text) self.__engine.runAndWait() self.__engine.stop()
Kita boleh mencipta kelas borang yang mewarisi PyQt5 yang baru kita buat, dan mendaftarkan fungsi panggil balik untuk acara seret dan klik borang, dan cipta kelas alat suara pada masa yang sama Contoh digunakan untuk melaksanakan operasi suara yang perlu dilakukan apabila peristiwa yang ditentukan dicetuskan.
import sys import _thread as th from PyQt5.QtWidgets import QMainWindow, QApplication from Ui_tts_form import Ui_Form class MainWindow(QMainWindow, Ui_Form): ''' 窗体类 ''' def __init__(self, parent=None): ''' 初始化窗体 ''' super(MainWindow, self).__init__(parent) self.setupUi(self) # 获取tts工具类实例 self.engine = VoiceEngine() self.__isPlaying = False # 设置初始文本 self.tbx_text.setText('床前明月光,疑似地上霜。\n举头望明月,低头思故乡。') # 进度条数据绑定到label中显示 self.slider_rate.valueChanged.connect(self.setRateTextValue) self.slider_volumn.valueChanged.connect(self.setVolumnTextValue) # 设置进度条初始值 self.slider_rate.setValue(self.engine.Rate) self.slider_volumn.setValue(self.engine.Volume) # RadioButton选择事件 self.rbtn_zh.toggled.connect(self.onSelectVoice_zh) self.rbtn_en.toggled.connect(self.onSelectVoice_en) # 播放按钮点击事件 self.btn_play.clicked.connect(self.onPlayButtonClick) def setRateTextValue(self): ''' 修改语速label文本值 ''' value = self.slider_rate.value() self.label_rate.setText(str(value)) self.engine.Rate = value def setVolumnTextValue(self): ''' 修改音量label文本值 ''' value = self.slider_volumn.value() self.label_volumn.setText(str(value / 100)) self.engine.Volume = value def onSelectVoice_zh(self): ''' 修改中文的语音配置及默认播放文本 ''' self.tbx_text.setText('床前明月光,疑似地上霜。\n举头望明月,低头思故乡。') self.engine.VoiceID = 0 def onSelectVoice_en(self): ''' 修改英文的语音配置及默认的播放文本 ''' self.tbx_text.setText('Hello World') self.engine.VoiceID = 1 def playVoice(self): ''' 播放 ''' if self.__isPlaying is not True: self.__isPlaying = True text = self.tbx_text.toPlainText() self.engine.Say(text) self.__isPlaying = False def onPlayButtonClick(self): ''' 播放按钮点击事件 开启线程新线程播放语音,避免窗体因为语音播放而假卡死 ''' th.start_new_thread(self.playVoice, ())
import sys import _thread as th from PyQt5.QtWidgets import QMainWindow, QApplication from Ui_tts_form import Ui_Form import pyttsx3 class VoiceEngine(): ''' tts 语音工具类 ''' def __init__(self): ''' 初始化 ''' # tts对象 self.__engine = pyttsx3.init() # 语速 self.__rate = 150 # 音量 self.__volume = 100 # 语音ID,0为中文,1为英文 self.__voice = 0 @property def Rate(self): ''' 语速属性 ''' return self.__rate @Rate.setter def Rate(self, value): self.__rate = value @property def Volume(self): ''' 音量属性 ''' return self.__volume @Volume.setter def Volume(self, value): self.__volume = value @property def VoiceID(self): ''' 语音ID:0 -- 中文;1 -- 英文 ''' return self.__voice @VoiceID.setter def VoiceID(self, value): self.__voice = value def Say(self, text): ''' 播放语音 ''' self.__engine.setProperty('rate', self.__rate) self.__engine.setProperty('volume', self.__volume) voices = self.__engine.getProperty('voices') self.__engine.setProperty('voice', voices[self.__voice]) # 保存语音文件 # self.__engine.save_to_file(text, 'test.mp3') self.__engine.say(text) self.__engine.runAndWait() self.__engine.stop() class MainWindow(QMainWindow, Ui_Form): ''' 窗体类 ''' def __init__(self, parent=None): ''' 初始化窗体 ''' super(MainWindow, self).__init__(parent) self.setupUi(self) # 获取tts工具类实例 self.engine = VoiceEngine() self.__isPlaying = False # 设置初始文本 self.tbx_text.setText('床前明月光,疑似地上霜。\n举头望明月,低头思故乡。') # 进度条数据绑定到label中显示 self.slider_rate.valueChanged.connect(self.setRateTextValue) self.slider_volumn.valueChanged.connect(self.setVolumnTextValue) # 设置进度条初始值 self.slider_rate.setValue(self.engine.Rate) self.slider_volumn.setValue(self.engine.Volume) # RadioButton选择事件 self.rbtn_zh.toggled.connect(self.onSelectVoice_zh) self.rbtn_en.toggled.connect(self.onSelectVoice_en) # 播放按钮点击事件 self.btn_play.clicked.connect(self.onPlayButtonClick) def setRateTextValue(self): ''' 修改语速label文本值 ''' value = self.slider_rate.value() self.label_rate.setText(str(value)) self.engine.Rate = value def setVolumnTextValue(self): ''' 修改音量label文本值 ''' value = self.slider_volumn.value() self.label_volumn.setText(str(value / 100)) self.engine.Volume = value def onSelectVoice_zh(self): ''' 修改中文的语音配置及默认播放文本 ''' self.tbx_text.setText('床前明月光,疑似地上霜。\n举头望明月,低头思故乡。') self.engine.VoiceID = 0 def onSelectVoice_en(self): ''' 修改英文的语音配置及默认的播放文本 ''' self.tbx_text.setText('Hello World') self.engine.VoiceID = 1 def playVoice(self): ''' 播放 ''' if self.__isPlaying is not True: self.__isPlaying = True text = self.tbx_text.toPlainText() self.engine.Say(text) self.__isPlaying = False def onPlayButtonClick(self): ''' 修改语速label文本值 ''' th.start_new_thread(self.playVoice, ()) if __name__ == "__main__": ''' 主函数 ''' app = QApplication(sys.argv) form = MainWindow() form.show() sys.exit(app.exec_())
Atas ialah kandungan terperinci Bagaimana untuk melaksanakan alat sintesis pertuturan Python. Untuk maklumat lanjut, sila ikut artikel berkaitan lain di laman web China PHP!