The pyttsx3 module offers a cross-platform interface for offline text-to-speech synthesis, leveraging underlying system engines such as SAPI5 on Windows, NSSpeechSynthesizer on macOS, and espeak on Linux. This dependency on native drivers ensures functionality without requiring an active internet connection, making it suitable for localized applications.
Developers can manipulate synthesis parameters including pitch, speaking rate, and volume. Additionally, the library allows iteration through available voice profiles installed on the host system.
Core Implementation
A basic integration involves initializing the driver and queueing text for playback. Wrapping this logic within a class provides better state management for larger applications.
import pyttsx3
class VoiceEngine:
def __init__(self):
self._driver = pyttsx3.init()
def synthesize(self, content):
if not content:
return
self._driver.say(content)
self._driver.runAndWait()
if __name__ == "__main__":
processor = VoiceEngine()
processor.synthesize("System ready. Initialization complete.")
This structure supports multiple languages depending on the operating system's installed voice packs. For instance, Chinese characters can be passed directly to the synthesize method provided the corresponding voice data is present.
Desktop Interface Integration
To create a user-friendly tool, the tkinter library can be combined with pyttsx3. The following implementation defines a main application class that manages the window state and event handling.
import tkinter as tk
from tkinter import ttk
import pyttsx3
import datetime
class TTSApp(tk.Tk):
def __init__(self):
super().__init__()
self.title("Offline Speech Converter")
self.geometry("400x500")
self.engine = pyttsx3.init()
self._setup_ui()
def _setup_ui(self):
self.input_box = tk.Text(self, height=10, width=40)
self.input_box.pack(pady=10)
self.file_name_entry = tk.Entry(self)
self.file_name_entry.pack(pady=5)
self.file_name_entry.insert(0, "audio_output")
btn_frame = tk.Frame(self)
btn_frame.pack(pady=10)
save_btn = tk.Button(btn_frame, text="Export Audio", command=self._export_file)
save_btn.pack(side=tk.LEFT, padx=5)
play_btn = tk.Button(btn_frame, text="Preview", command=self._play_preview)
play_btn.pack(side=tk.LEFT, padx=5)
self.status_label = tk.Label(self, text="Ready", fg="green")
self.status_label.pack(pady=5)
def _get_text(self):
return self.input_box.get("1.0", "end-1c")
def _export_file(self):
content = self._get_text()
fname = self.file_name_entry.get() or datetime.datetime.now().strftime("%Y%m%d%H%M%S")
try:
self.engine.save_to_file(content, f"{fname}.wav")
self.engine.runAndWait()
self.status_label.config(text=f"Saved: {fname}.wav")
except Exception as e:
self.status_label.config(text=f"Error: {str(e)}")
def _play_preview(self):
content = self._get_text()
if content:
self.engine.say(content)
self.engine.runAndWait()
if __name__ == "__main__":
app = TTSApp()
app.mainloop()
Advanced Configuration Controls
For granular control over the output, properties such as speech rate and volume can be exposed via the interface. The application can also query available voices and populate a selection menu.
import tkinter as tk
from tkinter import ttk
import pyttsx3
class AdvancedTTS(tk.Tk):
def __init__(self):
super().__init__()
self.title("Advanced TTS Controller")
self.engine = pyttsx3.init()
self.voice_ids = [v.id for v in self.engine.getProperty('voices')]
self._build_controls()
def _build_controls(self):
tk.Label(self, text="Content:").pack()
self.text_area = tk.Text(self, height=8, width=50)
self.text_area.pack()
tk.Label(self, text="Speed (50-300):").pack()
self.rate_var = tk.StringVar(value="150")
tk.Entry(self, textvariable=self.rate_var).pack()
tk.Label(self, text="Volume (0.0-1.0):").pack()
self.vol_var = tk.StringVar(value="1.0")
tk.Entry(self, textvariable=self.vol_var).pack()
tk.Label(self, text="Voice Profile:").pack()
self.voice_combo = ttk.Combobox(self, values=self.voice_ids, state="readonly")
self.voice_combo.current(0)
self.voice_combo.pack()
tk.Button(self, text="Generate & Save", command=self._process_save).pack(pady=10)
def _process_save(self):
text = self.text_area.get("1.0", "end-1c")
self.engine.setProperty('rate', int(self.rate_var.get()))
self.engine.setProperty('volume', float(self.vol_var.get()))
self.engine.setProperty('voice', self.voice_combo.get())
output_name = f"customspeech_{int(datetime.datetime.now().timestamp())}.wav"
self.engine.save_to_file(text, output_name)
self.engine.runAndWait()
This configuration allows users to tailor the audio output specifically to thier requirements before exporting. Properties are applied dynamically during the execution of the save or preview commands.