diff --git a/docs/async_client.md b/docs/async_client.md index d157b773..c6fd9494 100644 --- a/docs/async_client.md +++ b/docs/async_client.md @@ -21,6 +21,7 @@ The G4F AsyncClient API is designed to be compatible with the OpenAI API, making - [Using a Vision Model](#using-a-vision-model) - **[Transcribing Audio with Chat Completions](#transcribing-audio-with-chat-completions)** *(New Section)* - [Image Generation](#image-generation) + - **[Video Generation](#video-generation)** *(New Section)* - [Advanced Usage](#advanced-usage) - [Conversation Memory](#conversation-memory) - [Search Tool Support](#search-tool-support) @@ -327,6 +328,46 @@ asyncio.run(main()) --- +### Video Generation + +The G4F `AsyncClient` also supports **video generation** through supported providers like `HuggingFaceMedia`. You can retrieve the list of available video models and generate videos from prompts. + +**Example: Generate a video using a prompt** + +```python +import asyncio +from g4f.client import AsyncClient +from g4f.Provider import HuggingFaceMedia + +async def main(): + client = AsyncClient( + provider=HuggingFaceMedia, + api_key="hf_***" # Your API key here + ) + + # Get available video models + video_models = client.models.get_video() + print("Available Video Models:", video_models) + + # Generate video + result = await client.media.generate( + model=video_models[0], + prompt="G4F AI technology is the best in the world.", + response_format="url" + ) + + print("Generated Video URL:", result.data[0].url) + +asyncio.run(main()) +``` + +#### Explanation +- **Client Initialization**: An `AsyncClient` is initialized using the `HuggingFaceMedia` provider with an API key. +- **Model Discovery**: `client.models.get_video()` fetches a list of supported video models. +- **Video Generation**: A prompt is submitted to generate a video using `await client.media.generate(...)`. +- **Output**: The result includes a URL to the generated video, accessed via `result.data[0].url`. + +> Make sure your selected provider supports media generation and your API key has appropriate permissions. ## Advanced Usage diff --git a/g4f/Provider/Blackbox.py b/g4f/Provider/Blackbox.py index c7158b38..1cc7fb42 100644 --- a/g4f/Provider/Blackbox.py +++ b/g4f/Provider/Blackbox.py @@ -16,9 +16,9 @@ from ..requests.raise_for_status import raise_for_status from .base_provider import AsyncGeneratorProvider, ProviderModelMixin from ..image import to_data_uri from ..cookies import get_cookies_dir -from .helper import format_prompt, format_image_prompt +from .helper import format_image_prompt from ..providers.response import JsonConversation, ImageResponse -from ..errors import ModelNotSupportedError +from ..tools.media import merge_media from .. import debug class Conversation(JsonConversation): @@ -488,7 +488,7 @@ class Blackbox(AsyncGeneratorProvider, ProviderModelMixin): "filePath": f"/{image_name}", "contents": to_data_uri(image) } - for image, image_name in media + for image, image_name in merge_media(media, messages) ], "fileText": "", "title": "" diff --git a/g4f/Provider/Copilot.py b/g4f/Provider/Copilot.py index 7aa0f81f..5de06840 100644 --- a/g4f/Provider/Copilot.py +++ b/g4f/Provider/Copilot.py @@ -24,8 +24,9 @@ from .openai.har_file import get_headers, get_har_files from ..typing import CreateResult, Messages, MediaListType from ..errors import MissingRequirementsError, NoValidHarFileError, MissingAuthError from ..requests.raise_for_status import raise_for_status -from ..providers.response import BaseConversation, JsonConversation, RequestLogin, Parameters, ImageResponse +from ..providers.response import BaseConversation, JsonConversation, RequestLogin, ImageResponse from ..providers.asyncio import get_running_loop +from ..tools.media import merge_media from ..requests import get_nodriver from ..image import to_bytes, is_accepted_format from .helper import get_last_user_message @@ -142,17 +143,18 @@ class Copilot(AbstractProvider, ProviderModelMixin): debug.log(f"Copilot: Use conversation: {conversation_id}") uploaded_images = [] - if media is not None: - for image, _ in media: - data = to_bytes(image) + media, _ = [(None, None), *merge_media(media, messages)].pop() + if media: + if not isinstance(media, str): + data = to_bytes(media) response = session.post( "https://copilot.microsoft.com/c/api/attachments", headers={"content-type": is_accepted_format(data)}, data=data ) raise_for_status(response) - uploaded_images.append({"type":"image", "url": response.json().get("url")}) - break + media = response.json().get("url") + uploaded_images.append({"type":"image", "url": media}) wss = session.ws_connect(cls.websocket_url) # if clarity_token is not None: diff --git a/g4f/Provider/PollinationsAI.py b/g4f/Provider/PollinationsAI.py index aea92c3d..3c4202e0 100644 --- a/g4f/Provider/PollinationsAI.py +++ b/g4f/Provider/PollinationsAI.py @@ -11,13 +11,14 @@ from aiohttp import ClientSession from .helper import filter_none, format_image_prompt from .base_provider import AsyncGeneratorProvider, ProviderModelMixin from ..typing import AsyncResult, Messages, MediaListType -from ..image import to_data_uri, is_data_an_audio, to_input_audio +from ..image import is_data_an_audio from ..errors import ModelNotFoundError from ..requests.raise_for_status import raise_for_status from ..requests.aiohttp import get_connector from ..image.copy_images import save_response_media from ..image import use_aspect_ratio from ..providers.response import FinishReason, Usage, ToolCalls, ImageResponse +from ..tools.media import render_messages from .. import debug DEFAULT_HEADERS = { @@ -285,32 +286,15 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin): if response_format and response_format.get("type") == "json_object": json_mode = True - if media and messages: - last_message = messages[-1].copy() - image_content = [ - { - "type": "input_audio", - "input_audio": to_input_audio(media_data, filename) - } - if is_data_an_audio(media_data, filename) else { - "type": "image_url", - "image_url": {"url": to_data_uri(media_data)} - } - for media_data, filename in media - ] - last_message["content"] = image_content + ([{"type": "text", "text": last_message["content"]}] if isinstance(last_message["content"], str) else image_content) - messages[-1] = last_message - async with ClientSession(headers=DEFAULT_HEADERS, connector=get_connector(proxy=proxy)) as session: if model in cls.audio_models: - #data["voice"] = random.choice(cls.audio_models[model]) url = cls.text_api_endpoint stream = False else: url = cls.openai_endpoint extra_parameters = {param: kwargs[param] for param in extra_parameters if param in kwargs} data = filter_none(**{ - "messages": messages, + "messages": list(render_messages(messages, media)), "model": model, "temperature": temperature, "presence_penalty": presence_penalty, @@ -324,7 +308,7 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin): }) async with session.post(url, json=data) as response: await raise_for_status(response) - async for chunk in save_response_media(response, messages[-1]["content"], [model]): + async for chunk in save_response_media(response, format_image_prompt(messages), [model]): yield chunk return if response.headers["content-type"].startswith("text/plain"): diff --git a/g4f/Provider/hf/HuggingChat.py b/g4f/Provider/hf/HuggingChat.py index 7e10f4e3..b315dfa3 100644 --- a/g4f/Provider/hf/HuggingChat.py +++ b/g4f/Provider/hf/HuggingChat.py @@ -24,6 +24,7 @@ from ...requests import get_args_from_nodriver, DEFAULT_HEADERS from ...requests.raise_for_status import raise_for_status from ...providers.response import JsonConversation, ImageResponse, Sources, TitleGeneration, Reasoning, RequestLogin from ...cookies import get_cookies +from ...tools.media import merge_media from .models import default_model, default_vision_model, fallback_models, image_models, model_aliases from ... import debug @@ -146,13 +147,12 @@ class HuggingChat(AsyncAuthedProvider, ProviderModelMixin): } data = CurlMime() data.addpart('data', data=json.dumps(settings, separators=(',', ':'))) - if media is not None: - for image, filename in media: - data.addpart( - "files", - filename=f"base64;{filename}", - data=base64.b64encode(to_bytes(image)) - ) + for image, filename in merge_media(media, messages): + data.addpart( + "files", + filename=f"base64;{filename}", + data=base64.b64encode(to_bytes(image)) + ) response = session.post( f'{cls.url}/conversation/{conversationId}', diff --git a/g4f/Provider/hf/HuggingFaceMedia.py b/g4f/Provider/hf/HuggingFaceMedia.py index 72ffaedf..e091f3a4 100644 --- a/g4f/Provider/hf/HuggingFaceMedia.py +++ b/g4f/Provider/hf/HuggingFaceMedia.py @@ -142,20 +142,30 @@ class HuggingFaceMedia(AsyncGeneratorProvider, ProviderModelMixin): } else: extra_data = use_aspect_ratio(extra_data, "1:1" if aspect_ratio is None else aspect_ratio) - if provider_key == "fal-ai": - url = f"{api_base}/{provider_id}" + url = f"{api_base}/{provider_id}" + data = { + "prompt": prompt, + **extra_data + } + if provider_key == "fal-ai" and task == "text-to-image": + if aspect_ratio is None or aspect_ratio == "1:1": + image_size = "square_hd", + elif aspect_ratio == "16:9": + image_size = "landscape_hd", + elif aspect_ratio == "9:16": + image_size = "portrait_16_9" + else: + image_size = extra_data # width, height data = { - "prompt": prompt, - "image_size": "square_hd", - **extra_data + "image_size": image_size, + **data } + elif provider_key == "novita": + url = f"{api_base}/v3/hf/{provider_id}" elif provider_key == "replicate": url = f"{api_base}/v1/models/{provider_id}/predictions" data = { - "input": { - "prompt": prompt, - **extra_data - } + "input": data } elif provider_key in ("hf-inference", "hf-free"): api_base = "https://api-inference.huggingface.co" @@ -171,9 +181,8 @@ class HuggingFaceMedia(AsyncGeneratorProvider, ProviderModelMixin): url = f"{api_base}/v1/images/generations" data = { "response_format": "url", - "prompt": prompt, "model": provider_id, - **extra_data + **data } async with StreamSession( @@ -193,7 +202,7 @@ class HuggingFaceMedia(AsyncGeneratorProvider, ProviderModelMixin): return provider_info, chunk result = await response.json() if "video" in result: - return provider_info, VideoResponse(result["video"]["url"], prompt) + return provider_info, VideoResponse(result.get("video").get("url", result.get("video").get("url")), prompt)#video_url elif task == "text-to-image": return provider_info, ImageResponse([item["url"] for item in result.get("images", result.get("data"))], prompt) elif task == "text-to-video": diff --git a/g4f/Provider/hf/__init__.py b/g4f/Provider/hf/__init__.py index 66839427..0a791166 100644 --- a/g4f/Provider/hf/__init__.py +++ b/g4f/Provider/hf/__init__.py @@ -20,7 +20,7 @@ class HuggingFace(AsyncGeneratorProvider, ProviderModelMixin): supports_message_history = True @classmethod - def get_models(cls) -> list[str]: + def get_models(cls, **kwargs) -> list[str]: if not cls.models: cls.models = HuggingFaceInference.get_models() cls.image_models = HuggingFaceInference.image_models diff --git a/g4f/Provider/needs_auth/Gemini.py b/g4f/Provider/needs_auth/Gemini.py index 83c72981..7b5239c7 100644 --- a/g4f/Provider/needs_auth/Gemini.py +++ b/g4f/Provider/needs_auth/Gemini.py @@ -27,6 +27,7 @@ from ...requests import get_nodriver from ...errors import MissingAuthError from ...image import to_bytes from ...cookies import get_cookies_dir +from ...tools.media import merge_media from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin from ..helper import format_prompt, get_cookies, get_last_user_message from ... import debug @@ -186,7 +187,7 @@ class Gemini(AsyncGeneratorProvider, ProviderModelMixin): cls.start_auto_refresh() ) - uploads = None if media is None else await cls.upload_images(base_connector, media) + uploads = await cls.upload_images(base_connector, merge_media(media, messages)) async with ClientSession( cookies=cls._cookies, headers=REQUEST_HEADERS, diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py index f9209586..19e50d6b 100644 --- a/g4f/Provider/needs_auth/OpenaiChat.py +++ b/g4f/Provider/needs_auth/OpenaiChat.py @@ -25,7 +25,8 @@ from ...requests import get_nodriver from ...image import ImageRequest, to_image, to_bytes, is_accepted_format from ...errors import MissingAuthError, NoValidHarFileError from ...providers.response import JsonConversation, FinishReason, SynthesizeData, AuthResult, ImageResponse -from ...providers.response import Sources, TitleGeneration, RequestLogin, Parameters, Reasoning +from ...providers.response import Sources, TitleGeneration, RequestLogin, Reasoning +from ...tools.media import merge_media from ..helper import format_cookies, get_last_user_message from ..openai.models import default_model, default_image_model, models, image_models, text_models from ..openai.har_file import get_request_config @@ -187,8 +188,6 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): await raise_for_status(response, "Get download url failed") image_data["download_url"] = (await response.json())["download_url"] return ImageRequest(image_data) - if not media: - return return [await upload_image(image, image_name) for image, image_name in media] @classmethod @@ -330,7 +329,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin): cls._update_request_args(auth_result, session) await raise_for_status(response) try: - image_requests = None if media is None else await cls.upload_images(session, auth_result, media) + image_requests = await cls.upload_images(session, auth_result, merge_media(media, messages)) except Exception as e: debug.error("OpenaiChat: Upload image failed") debug.error(e) diff --git a/g4f/Provider/template/OpenaiTemplate.py b/g4f/Provider/template/OpenaiTemplate.py index eb253bc6..f5960f20 100644 --- a/g4f/Provider/template/OpenaiTemplate.py +++ b/g4f/Provider/template/OpenaiTemplate.py @@ -7,8 +7,8 @@ from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin, RaiseErr from ...typing import Union, AsyncResult, Messages, MediaListType from ...requests import StreamSession, raise_for_status from ...providers.response import FinishReason, ToolCalls, Usage, ImageResponse +from ...tools.media import render_messages from ...errors import MissingAuthError, ResponseError -from ...image import to_data_uri, is_data_an_audio, to_input_audio from ... import debug class OpenaiTemplate(AsyncGeneratorProvider, ProviderModelMixin, RaiseErrorMixin): @@ -97,27 +97,9 @@ class OpenaiTemplate(AsyncGeneratorProvider, ProviderModelMixin, RaiseErrorMixin yield ImageResponse([image["url"] for image in data["data"]], prompt) return - if media is not None and messages: - if not model and hasattr(cls, "default_vision_model"): - model = cls.default_vision_model - last_message = messages[-1].copy() - image_content = [ - { - "type": "input_audio", - "input_audio": to_input_audio(media_data, filename) - } - if is_data_an_audio(media_data, filename) else { - "type": "image_url", - "image_url": {"url": to_data_uri(media_data)} - } - for media_data, filename in media - ] - last_message["content"] = image_content + ([{"type": "text", "text": last_message["content"]}] if isinstance(last_message["content"], str) else image_content) - - messages[-1] = last_message extra_parameters = {key: kwargs[key] for key in extra_parameters if key in kwargs} data = filter_none( - messages=messages, + messages=list(render_messages(messages, media)), model=model, temperature=temperature, max_tokens=max_tokens, diff --git a/g4f/client/__init__.py b/g4f/client/__init__.py index 33b795a9..5c2f892b 100644 --- a/g4f/client/__init__.py +++ b/g4f/client/__init__.py @@ -19,7 +19,7 @@ from ..providers.asyncio import to_sync_generator from ..Provider.needs_auth import BingCreateImages, OpenaiAccount from ..tools.run_tools import async_iter_run_tools, iter_run_tools from .stubs import ChatCompletion, ChatCompletionChunk, Image, ImagesResponse, UsageModel, ToolCallModel -from .image_models import MediaModels +from .models import ClientModels from .types import IterResponse, ImageProvider, Client as BaseClient from .service import get_model_and_provider, convert_to_provider from .helper import find_stop, filter_json, filter_none, safe_aclose @@ -269,7 +269,7 @@ class Client(BaseClient): self.chat: Chat = Chat(self, provider) if image_provider is None: image_provider = provider - self.models: MediaModels = MediaModels(self, image_provider) + self.models: ClientModels = ClientModels(self, provider, image_provider) self.images: Images = Images(self, image_provider) self.media: Images = self.images @@ -558,7 +558,7 @@ class AsyncClient(BaseClient): self.chat: AsyncChat = AsyncChat(self, provider) if image_provider is None: image_provider = provider - self.models: MediaModels = MediaModels(self, image_provider) + self.models: ClientModels = ClientModels(self, provider, image_provider) self.images: AsyncImages = AsyncImages(self, image_provider) self.media: AsyncImages = self.images diff --git a/g4f/client/image_models.py b/g4f/client/image_models.py deleted file mode 100644 index 7146f5f2..00000000 --- a/g4f/client/image_models.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import annotations - -from ..models import ModelUtils, ImageModel -from ..Provider import ProviderUtils -from ..providers.types import ProviderType - -class MediaModels(): - def __init__(self, client, provider: ProviderType = None): - self.client = client - self.provider = provider - - def get(self, name, default=None) -> ProviderType: - if name in ModelUtils.convert: - return ModelUtils.convert[name].best_provider - if name in ProviderUtils.convert: - return ProviderUtils.convert[name] - return default - - def get_all(self, api_key: str = None, **kwargs) -> list[str]: - if self.provider is None: - return [] - if api_key is None: - api_key = self.client.api_key - return self.provider.get_models( - **kwargs, - **{} if api_key is None else {"api_key": api_key} - ) - - def get_image(self, **kwargs) -> list[str]: - if self.provider is None: - return [model_id for model_id, model in ModelUtils.convert.items() if isinstance(model, ImageModel)] - self.get_all(**kwargs) - if hasattr(self.provider, "image_models"): - return self.provider.image_models - return [] - - def get_video(self, **kwargs) -> list[str]: - if self.provider is None: - return [] - self.get_all(**kwargs) - if hasattr(self.provider, "video_models"): - return self.provider.video_models - return [] \ No newline at end of file diff --git a/g4f/client/models.py b/g4f/client/models.py new file mode 100644 index 00000000..a47b3e59 --- /dev/null +++ b/g4f/client/models.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from ..models import ModelUtils, ImageModel, VisionModel +from ..Provider import ProviderUtils +from ..providers.types import ProviderType + +class ClientModels(): + def __init__(self, client, provider: ProviderType = None, media_provider: ProviderType = None): + self.client = client + self.provider = provider + self.media_provider = media_provider + + def get(self, name, default=None) -> ProviderType: + if name in ModelUtils.convert: + return ModelUtils.convert[name].best_provider + if name in ProviderUtils.convert: + return ProviderUtils.convert[name] + return default + + def get_all(self, api_key: str = None, **kwargs) -> list[str]: + if self.provider is None: + return [] + if api_key is None: + api_key = self.client.api_key + return self.provider.get_models( + **kwargs, + **{} if api_key is None else {"api_key": api_key} + ) + + def get_vision(self, **kwargs) -> list[str]: + if self.provider is None: + return [model_id for model_id, model in ModelUtils.convert.items() if isinstance(model, VisionModel)] + self.get_all(**kwargs) + if hasattr(self.provider, "vision_models"): + return self.provider.vision_models + return [] + + def get_media(self, api_key: str = None, **kwargs) -> list[str]: + if self.media_provider is None: + return [] + if api_key is None: + api_key = self.client.api_key + return self.media_provider.get_models( + **kwargs, + **{} if api_key is None else {"api_key": api_key} + ) + + def get_image(self, **kwargs) -> list[str]: + if self.media_provider is None: + return [model_id for model_id, model in ModelUtils.convert.items() if isinstance(model, ImageModel)] + self.get_media(**kwargs) + if hasattr(self.media_provider, "image_models"): + return self.media_provider.image_models + return [] + + def get_video(self, **kwargs) -> list[str]: + if self.media_provider is None: + return [] + self.get_media(**kwargs) + if hasattr(self.media_provider, "video_models"): + return self.media_provider.video_models + return [] \ No newline at end of file diff --git a/g4f/gui/client/background.html b/g4f/gui/client/background.html index 951ca779..6fdb9080 100644 --- a/g4f/gui/client/background.html +++ b/g4f/gui/client/background.html @@ -89,7 +89,7 @@ - +
@@ -105,6 +105,7 @@ let skipImage = 0; let errorVideo = 0; let errorImage = 0; + let skipRefresh = 0; videoFeed.onloadeddata = () => { videoFeed.classList.remove("hidden"); gradient.classList.add("hidden"); @@ -116,15 +117,15 @@ gradient.classList.remove("hidden"); return; } - videoFeed.src = "/search/video?skip=" + skipVideo; + videoFeed.src = "/search/video+g4f?skip=" + skipVideo; skipVideo++; }; videoFeed.onended = () => { - videoFeed.src = "/search/video?skip=" + skipVideo; + videoFeed.src = "/search/video+g4f?skip=" + skipVideo; skipVideo++; }; videoFeed.onclick = () => { - videoFeed.src = "/search/video?skip=" + skipVideo; + videoFeed.src = "/search/video+g4f?skip=" + skipVideo; skipVideo++; }; function initES() { @@ -173,11 +174,15 @@ skipImage++; return; } + if (skipRefresh) { + skipRefresh = 0; + return; + } if (images.length > 0) { imageFeed.classList.remove("hidden"); imageFeed.src = images.shift(); gradient.classList.add("hidden"); - } else if(imageFeed) { + } else { initES(); } }, 7000); @@ -192,6 +197,7 @@ }; imageFeed.onclick = () => { imageFeed.src = "/search/image?random=" + Math.random(); + skipRefresh = 1; }; })(); diff --git a/g4f/gui/client/demo.html b/g4f/gui/client/demo.html index 6aa0feeb..6d2695b6 100644 --- a/g4f/gui/client/demo.html +++ b/g4f/gui/client/demo.html @@ -81,16 +81,19 @@ border: none; } - #background, #image-feed { + #background { height: 100%; position: absolute; - z-index: -1; object-fit: cover; object-position: center; width: 100%; background: black; } + .container * { + z-index: 2; + } + .description, form p a { font-size: 1.2rem; margin-bottom: 30px; @@ -176,9 +179,6 @@ - -
- diff --git a/g4f/gui/client/home.html b/g4f/gui/client/home.html index f4016949..4d5f14ce 100644 --- a/g4f/gui/client/home.html +++ b/g4f/gui/client/home.html @@ -48,7 +48,6 @@ align-items: center; height: 100%; text-align: center; - z-index: 1; } header { @@ -67,7 +66,11 @@ #background { height: 100%; position: absolute; - z-index: -1; + top: 0; + } + + .container * { + z-index: 2; } .stream-widget { diff --git a/g4f/gui/client/index.html b/g4f/gui/client/index.html index 30e696cd..527464da 100644 --- a/g4f/gui/client/index.html +++ b/g4f/gui/client/index.html @@ -270,7 +270,7 @@