mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-10-25 01:00:28 +08:00
- Modified g4f/providers/response.py to ensure format_images_markdown returns the result directly without additional flags in the 'format_images_markdown' function.
- Updated g4f/gui/server/api.py to add 'tempfiles' parameter with default empty list to '_create_response_stream' method.
- Changed or added code in API response handling to iterate over 'tempfiles' and attempt to remove each file after response completion, with exception handling (try-except block with logger.exception).
- Adjusted g4f/Tools/files.py to fix tempfile creation: corrected the 'suffix' parameter in 'get_tempfile' to use 'suffix' directly instead of splitting.
- In g4f/tools/media.py, changed 'render_part' function to handle 'text' key properly, checking 'part.get("text")' and returning a dictionary with 'type': 'text' and 'text': value, if present.
56 lines
1.7 KiB
Python
56 lines
1.7 KiB
Python
from typing import BinaryIO, Union, Awaitable
|
|
import base64
|
|
import mimetypes
|
|
import asyncio
|
|
from markitdown._stream_info import StreamInfo
|
|
|
|
|
|
def llm_caption(
|
|
file_stream: BinaryIO, stream_info: StreamInfo, *, client, model, prompt=None
|
|
) -> Union[None, str, Awaitable[str]]:
|
|
if prompt is None or prompt.strip() == "":
|
|
prompt = "Write a detailed caption for this image."
|
|
|
|
# Get the content type
|
|
content_type = stream_info.mimetype
|
|
if not content_type:
|
|
content_type, _ = mimetypes.guess_type("_dummy" + (stream_info.extension or ""))
|
|
if not content_type:
|
|
content_type = "application/octet-stream"
|
|
|
|
# Convert to base64
|
|
cur_pos = file_stream.tell()
|
|
try:
|
|
base64_image = base64.b64encode(file_stream.read()).decode("utf-8")
|
|
except Exception as e:
|
|
return None
|
|
finally:
|
|
file_stream.seek(cur_pos)
|
|
|
|
# Prepare the data-uri
|
|
data_uri = f"data:{content_type};base64,{base64_image}"
|
|
|
|
# Prepare the OpenAI API request
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": prompt},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": data_uri,
|
|
},
|
|
},
|
|
],
|
|
}
|
|
]
|
|
|
|
# Call the OpenAI API
|
|
response = client.chat.completions.create(model=model, messages=messages)
|
|
if asyncio.iscoroutine(response):
|
|
async def read_content(response):
|
|
response = await response
|
|
return response.choices[0].message.content
|
|
return read_content(response)
|
|
return response.choices[0].message.content |