Files
gpt4free/g4f/Provider/audio/EdgeTTS.py
hlohaus b68b9ff6be feat: add audio generation support for multiple providers
- Added new examples for `client.media.generate` with `PollinationsAI`, `EdgeTTS`, and `Gemini` in `docs/media.md`
- Modified `PollinationsAI.py` to default to `default_audio_model` when audio data is present
- Adjusted `PollinationsAI.py` to conditionally construct message list from `prompt` when media is being generated
- Rearranged `PollinationsAI.py` response handling to yield `save_response_media` after checking for non-JSON content types
- Added support in `EdgeTTS.py` to use default values for `language`, `locale`, and `format` from class attributes
- Improved voice selection logic in `EdgeTTS.py` to fallback to default locale or language when not explicitly provided
- Updated `EdgeTTS.py` to yield `AudioResponse` with `text` field included
- Modified `Gemini.py` to support `.ogx` audio generation when `model == "gemini-audio"` or `audio` is passed
- Used `format_image_prompt` in `Gemini.py` to create audio prompt and saved audio file using `synthesize`
- Appended `AudioResponse` to `Gemini.py` for audio generation flow
- Added `save()` method to `Image` class in `stubs.py` to support saving `/media/` files locally
- Changed `client/__init__.py` to fallback to `options["text"]` if `alt` is missing in `Images.create`
- Ensured `AudioResponse` in `copy_images.py` includes the `text` (prompt) field
- Added `Annotated` fallback definition in `api/__init__.py` for compatibility with older Python versions
2025-04-19 06:23:46 +02:00

73 lines
2.7 KiB
Python

from __future__ import annotations
import os
import random
import asyncio
try:
import edge_tts
from edge_tts import VoicesManager
has_edge_tts = True
except ImportError:
has_edge_tts = False
from ...typing import AsyncResult, Messages
from ...providers.response import AudioResponse
from ...image.copy_images import get_filename, get_media_dir, ensure_media_dir
from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
from ..helper import format_image_prompt
class EdgeTTS(AsyncGeneratorProvider, ProviderModelMixin):
label = "Edge TTS"
working = has_edge_tts
default_language = "en"
default_locale = "en-US"
default_format = "mp3"
@classmethod
def get_models(cls) -> list[str]:
if not cls.models:
voices = asyncio.run(VoicesManager.create())
cls.default_model = voices.find(Locale=cls.default_locale)[0]["Name"]
cls.models = [voice["Name"] for voice in voices.voices]
return cls.models
@classmethod
async def create_async_generator(
cls,
model: str,
messages: Messages,
proxy: str = None,
prompt: str = None,
audio: dict = {},
**kwargs
) -> AsyncResult:
prompt = format_image_prompt(messages, prompt)
if not prompt:
raise ValueError("Prompt is empty.")
voice = audio.get("voice", model if model and model != "edge-tts" else None)
if not voice:
voices = await VoicesManager.create()
if "locale" in audio:
voices = voices.find(Locale=audio["locale"])
elif audio.get("language", cls.default_language) != cls.default_language:
if "-" in audio.get("language"):
voices = voices.find(Locale=audio.get("language"))
else:
voices = voices.find(Language=audio.get("language"))
else:
voices = voices.find(Locale=cls.default_locale)
if not voices:
raise ValueError(f"No voices found for language '{audio.get('language')}' and locale '{audio.get('locale')}'.")
voice = random.choice(voices)["Name"]
format = audio.get("format", cls.default_format)
filename = get_filename([cls.default_model], prompt, f".{format}", prompt)
target_path = os.path.join(get_media_dir(), filename)
ensure_media_dir()
extra_parameters = {param: audio[param] for param in ["rate", "volume", "pitch"] if param in audio}
communicate = edge_tts.Communicate(prompt, voice=voice, proxy=proxy, **extra_parameters)
await communicate.save(target_path)
yield AudioResponse(f"/media/{filename}", voice=voice, text=prompt)