Source code for manim_voiceover.services.openai

import os
import sys
from pathlib import Path

from dotenv import find_dotenv, load_dotenv
from manim import logger

from manim_voiceover.helper import (
    create_dotenv_file,
    prompt_ask_missing_extras,
    remove_bookmarks,
)
from manim_voiceover.services.base import SpeechService

try:
    import openai
except ImportError:
    logger.error(
        "Missing packages. "
        'Run `pip install "manim-voiceover[openai]"` to use OpenAIService.'
    )


load_dotenv(find_dotenv(usecwd=True))


def create_dotenv_openai():
    logger.info(
        "Check out https://voiceover.manim.community/en/stable/services.html "
        "to learn how to create an account and get your subscription key."
    )
    if not create_dotenv_file(["OPENAI_API_KEY"]):
        raise ValueError(
            "The environment variable OPENAI_API_KEY is not set. Please set it "
            "or create a .env file with the variables."
        )
    logger.info("The .env file has been created. Please run Manim again.")
    sys.exit()


[docs]class OpenAIService(SpeechService): """ Speech service class for OpenAI TTS Service. See the `OpenAI API page <https://platform.openai.com/docs/api-reference/audio/createSpeech>`__ for more information about voices and models. """ def __init__( self, voice: str = "alloy", model: str = "tts-1-hd", transcription_model="base", **kwargs ): """ Args: voice (str, optional): The voice to use. See the `API page <https://platform.openai.com/docs/api-reference/audio/createSpeech>`__ for all the available options. Defaults to ``"alloy"``. model (str, optional): The TTS model to use. See the `API page <https://platform.openai.com/docs/api-reference/audio/createSpeech>`__ for all the available options. Defaults to ``"tts-1-hd"``. """ prompt_ask_missing_extras("openai", "openai", "OpenAIService") self.voice = voice self.model = model SpeechService.__init__(self, transcription_model=transcription_model, **kwargs) def generate_from_text( self, text: str, cache_dir: str = None, path: str = None, **kwargs ) -> dict: """""" if cache_dir is None: cache_dir = self.cache_dir speed = kwargs.get("speed", 1.0) if not (0.25 <= speed <= 4.0): raise ValueError("The speed must be between 0.25 and 4.0.") input_text = remove_bookmarks(text) input_data = { "input_text": input_text, "service": "openai", "config": { "voice": self.voice, "model": self.model, "speed": speed, }, } cached_result = self.get_cached_result(input_data, cache_dir) if cached_result is not None: return cached_result if path is None: audio_path = self.get_audio_basename(input_data) + ".mp3" else: audio_path = path if os.getenv("OPENAI_API_KEY") is None: create_dotenv_openai() response = openai.audio.speech.create( model=self.model, voice=self.voice, input=input_text, speed=speed, ) response.stream_to_file(str(Path(cache_dir) / audio_path)) json_dict = { "input_text": text, "input_data": input_data, "original_audio": audio_path, } return json_dict