mirror of
				https://github.com/xtekky/gpt4free.git
				synced 2025-10-31 03:26:22 +08:00 
			
		
		
		
	 b68b9ff6be
			
		
	
	b68b9ff6be
	
	
	
		
			
			- Added new examples for `client.media.generate` with `PollinationsAI`, `EdgeTTS`, and `Gemini` in `docs/media.md` - Modified `PollinationsAI.py` to default to `default_audio_model` when audio data is present - Adjusted `PollinationsAI.py` to conditionally construct message list from `prompt` when media is being generated - Rearranged `PollinationsAI.py` response handling to yield `save_response_media` after checking for non-JSON content types - Added support in `EdgeTTS.py` to use default values for `language`, `locale`, and `format` from class attributes - Improved voice selection logic in `EdgeTTS.py` to fallback to default locale or language when not explicitly provided - Updated `EdgeTTS.py` to yield `AudioResponse` with `text` field included - Modified `Gemini.py` to support `.ogx` audio generation when `model == "gemini-audio"` or `audio` is passed - Used `format_image_prompt` in `Gemini.py` to create audio prompt and saved audio file using `synthesize` - Appended `AudioResponse` to `Gemini.py` for audio generation flow - Added `save()` method to `Image` class in `stubs.py` to support saving `/media/` files locally - Changed `client/__init__.py` to fallback to `options["text"]` if `alt` is missing in `Images.create` - Ensured `AudioResponse` in `copy_images.py` includes the `text` (prompt) field - Added `Annotated` fallback definition in `api/__init__.py` for compatibility with older Python versions
		
			
				
	
	
		
			73 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			73 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import annotations
 | |
| 
 | |
| import os
 | |
| import random
 | |
| import asyncio
 | |
| 
 | |
| try:
 | |
|     import edge_tts
 | |
|     from edge_tts import VoicesManager
 | |
|     has_edge_tts = True
 | |
| except ImportError:
 | |
|     has_edge_tts = False
 | |
| 
 | |
| from ...typing import AsyncResult, Messages
 | |
| from ...providers.response import AudioResponse
 | |
| from ...image.copy_images import get_filename, get_media_dir, ensure_media_dir
 | |
| from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
 | |
| from ..helper import format_image_prompt
 | |
| 
 | |
| class EdgeTTS(AsyncGeneratorProvider, ProviderModelMixin):
 | |
|     label = "Edge TTS"
 | |
|     working = has_edge_tts
 | |
|     default_language = "en"
 | |
|     default_locale = "en-US"
 | |
|     default_format = "mp3"
 | |
| 
 | |
|     @classmethod
 | |
|     def get_models(cls) -> list[str]:
 | |
|         if not cls.models:
 | |
|             voices = asyncio.run(VoicesManager.create())
 | |
|             cls.default_model = voices.find(Locale=cls.default_locale)[0]["Name"]
 | |
|             cls.models = [voice["Name"] for voice in voices.voices]
 | |
|         return cls.models
 | |
| 
 | |
|     @classmethod
 | |
|     async def create_async_generator(
 | |
|         cls,
 | |
|         model: str,
 | |
|         messages: Messages,
 | |
|         proxy: str = None,
 | |
|         prompt: str = None,
 | |
|         audio: dict = {},
 | |
|         **kwargs
 | |
|     ) -> AsyncResult:
 | |
|         prompt = format_image_prompt(messages, prompt)
 | |
|         if not prompt:
 | |
|             raise ValueError("Prompt is empty.")
 | |
|         voice = audio.get("voice", model if model and model != "edge-tts" else None)
 | |
|         if not voice:
 | |
|             voices = await VoicesManager.create()
 | |
|             if "locale" in audio:
 | |
|                 voices = voices.find(Locale=audio["locale"])
 | |
|             elif audio.get("language", cls.default_language) != cls.default_language:
 | |
|                 if "-" in audio.get("language"):
 | |
|                     voices = voices.find(Locale=audio.get("language"))
 | |
|                 else:
 | |
|                     voices = voices.find(Language=audio.get("language"))
 | |
|             else:
 | |
|                 voices = voices.find(Locale=cls.default_locale)
 | |
|             if not voices:
 | |
|                 raise ValueError(f"No voices found for language '{audio.get('language')}' and locale '{audio.get('locale')}'.")
 | |
|             voice = random.choice(voices)["Name"]
 | |
| 
 | |
|         format = audio.get("format", cls.default_format)
 | |
|         filename = get_filename([cls.default_model], prompt, f".{format}", prompt)
 | |
|         target_path = os.path.join(get_media_dir(), filename)
 | |
|         ensure_media_dir()
 | |
| 
 | |
|         extra_parameters = {param: audio[param] for param in ["rate", "volume", "pitch"] if param in audio}
 | |
|         communicate = edge_tts.Communicate(prompt, voice=voice, proxy=proxy, **extra_parameters)
 | |
| 
 | |
|         await communicate.save(target_path)
 | |
|         yield AudioResponse(f"/media/{filename}", voice=voice, text=prompt) |