mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-10-24 16:50:22 +08:00
refactor: improve media rendering and response formatting with precise changes
- Modified g4f/providers/response.py to ensure format_images_markdown returns the result directly without additional flags in the 'format_images_markdown' function.
- Updated g4f/gui/server/api.py to add 'tempfiles' parameter with default empty list to '_create_response_stream' method.
- Changed or added code in API response handling to iterate over 'tempfiles' and attempt to remove each file after response completion, with exception handling (try-except block with logger.exception).
- Adjusted g4f/Tools/files.py to fix tempfile creation: corrected the 'suffix' parameter in 'get_tempfile' to use 'suffix' directly instead of splitting.
- In g4f/tools/media.py, changed 'render_part' function to handle 'text' key properly, checking 'part.get("text")' and returning a dictionary with 'type': 'text' and 'text': value, if present.
This commit is contained in:
@@ -6,6 +6,7 @@ import asyncio
|
|||||||
|
|
||||||
from ..typing import AsyncResult, Messages
|
from ..typing import AsyncResult, Messages
|
||||||
from ..providers.response import ImageResponse
|
from ..providers.response import ImageResponse
|
||||||
|
from ..image import use_aspect_ratio
|
||||||
from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
|
from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
|
||||||
|
|
||||||
|
|
||||||
@@ -32,10 +33,18 @@ class ImageLabs(AsyncGeneratorProvider, ProviderModelMixin):
|
|||||||
# Image
|
# Image
|
||||||
prompt: str = None,
|
prompt: str = None,
|
||||||
negative_prompt: str = "",
|
negative_prompt: str = "",
|
||||||
width: int = 1152,
|
aspect_ratio: str = "1:1",
|
||||||
height: int = 896,
|
width: int = None,
|
||||||
|
height: int = None,
|
||||||
|
extra_body: dict = {},
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> AsyncResult:
|
) -> AsyncResult:
|
||||||
|
extra_body = use_aspect_ratio({
|
||||||
|
"width": width,
|
||||||
|
"height": height,
|
||||||
|
**extra_body
|
||||||
|
}, aspect_ratio)
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'accept': '*/*',
|
'accept': '*/*',
|
||||||
'accept-language': 'en-US,en;q=0.9',
|
'accept-language': 'en-US,en;q=0.9',
|
||||||
@@ -56,13 +65,12 @@ class ImageLabs(AsyncGeneratorProvider, ProviderModelMixin):
|
|||||||
"seed": str(int(time.time())),
|
"seed": str(int(time.time())),
|
||||||
"subseed": str(int(time.time() * 1000)),
|
"subseed": str(int(time.time() * 1000)),
|
||||||
"attention": 0,
|
"attention": 0,
|
||||||
"width": width,
|
|
||||||
"height": height,
|
|
||||||
"tiling": False,
|
"tiling": False,
|
||||||
"negative_prompt": negative_prompt,
|
"negative_prompt": negative_prompt,
|
||||||
"reference_image": "",
|
"reference_image": "",
|
||||||
"reference_image_type": None,
|
"reference_image_type": None,
|
||||||
"reference_strength": 30
|
"reference_strength": 30,
|
||||||
|
**extra_body
|
||||||
}
|
}
|
||||||
|
|
||||||
async with session.post(f'{cls.url}/txt2img', json=payload, proxy=proxy) as generate_response:
|
async with session.post(f'{cls.url}/txt2img', json=payload, proxy=proxy) as generate_response:
|
||||||
|
|||||||
@@ -359,18 +359,19 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin):
|
|||||||
return f"{url}&seed={seed}" if seed else url
|
return f"{url}&seed={seed}" if seed else url
|
||||||
async with ClientSession(headers=DEFAULT_HEADERS, connector=get_connector(proxy=proxy)) as session:
|
async with ClientSession(headers=DEFAULT_HEADERS, connector=get_connector(proxy=proxy)) as session:
|
||||||
responses = set()
|
responses = set()
|
||||||
|
responses.add(Reasoning(status=f"Generating {n} {'image' if n == 1 else 'images'}"))
|
||||||
finished = 0
|
finished = 0
|
||||||
|
start = time.time()
|
||||||
async def get_image(responses: set, i: int, seed: Optional[int] = None):
|
async def get_image(responses: set, i: int, seed: Optional[int] = None):
|
||||||
nonlocal finished
|
nonlocal finished
|
||||||
start = time.time()
|
|
||||||
async with session.get(get_image_url(i, seed), allow_redirects=False, headers={"referer": referrer}) as response:
|
async with session.get(get_image_url(i, seed), allow_redirects=False, headers={"referer": referrer}) as response:
|
||||||
try:
|
try:
|
||||||
await raise_for_status(response)
|
await raise_for_status(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
debug.error(f"Error fetching image: {e}")
|
debug.error(f"Error fetching image: {e}")
|
||||||
responses.add(Reasoning(status=f"Image #{i+1} generated in {time.time() - start:.2f}s"))
|
|
||||||
responses.add(ImageResponse(str(response.url), prompt))
|
responses.add(ImageResponse(str(response.url), prompt))
|
||||||
finished += 1
|
finished += 1
|
||||||
|
responses.add(Reasoning(status=f"Image {finished}/{n} generated in {time.time() - start:.2f}s"))
|
||||||
tasks = []
|
tasks = []
|
||||||
for i in range(int(n)):
|
for i in range(int(n)):
|
||||||
tasks.append(asyncio.create_task(get_image(responses, i, seed)))
|
tasks.append(asyncio.create_task(get_image(responses, i, seed)))
|
||||||
@@ -426,6 +427,8 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin):
|
|||||||
**extra_body
|
**extra_body
|
||||||
)
|
)
|
||||||
async with session.post(url, json=data, headers={"referer": referrer}) as response:
|
async with session.post(url, json=data, headers={"referer": referrer}) as response:
|
||||||
|
if response.status == 400:
|
||||||
|
debug.error(f"Error: 400 - Bad Request: {data}")
|
||||||
await raise_for_status(response)
|
await raise_for_status(response)
|
||||||
if response.headers["content-type"].startswith("text/plain"):
|
if response.headers["content-type"].startswith("text/plain"):
|
||||||
yield await response.text()
|
yield await response.text()
|
||||||
@@ -492,6 +495,6 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin):
|
|||||||
if finish_reason:
|
if finish_reason:
|
||||||
yield FinishReason(finish_reason)
|
yield FinishReason(finish_reason)
|
||||||
else:
|
else:
|
||||||
async for chunk in save_response_media(response, format_image_prompt(messages), [model, extra_parameters.get("audio", {}).get("voice")]):
|
async for chunk in save_response_media(response, format_image_prompt(messages), [model, extra_body.get("audio", {}).get("voice")]):
|
||||||
yield chunk
|
yield chunk
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import asyncio
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from markitdown import MarkItDown as MaItDo, StreamInfo
|
from ...integration.markitdown import MarkItDown as MaItDo, StreamInfo
|
||||||
has_markitdown = True
|
has_markitdown = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
has_markitdown = False
|
has_markitdown = False
|
||||||
|
|||||||
@@ -146,7 +146,7 @@ class Api:
|
|||||||
**kwargs
|
**kwargs
|
||||||
}
|
}
|
||||||
|
|
||||||
def _create_response_stream(self, kwargs: dict, provider: str, download_media: bool = True) -> Iterator:
|
def _create_response_stream(self, kwargs: dict, provider: str, download_media: bool = True, tempfiles: list[str] = []) -> Iterator:
|
||||||
def decorated_log(text: str, file = None):
|
def decorated_log(text: str, file = None):
|
||||||
debug.logs.append(text)
|
debug.logs.append(text)
|
||||||
if debug.logging:
|
if debug.logging:
|
||||||
@@ -163,7 +163,7 @@ class Api:
|
|||||||
has_images="media" in kwargs,
|
has_images="media" in kwargs,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
debug.error(e)
|
logger.exception(e)
|
||||||
yield self._format_json('error', type(e).__name__, message=get_error_message(e))
|
yield self._format_json('error', type(e).__name__, message=get_error_message(e))
|
||||||
return
|
return
|
||||||
if not isinstance(provider_handler, BaseRetryProvider):
|
if not isinstance(provider_handler, BaseRetryProvider):
|
||||||
@@ -198,7 +198,7 @@ class Api:
|
|||||||
tags = [model, kwargs.get("aspect_ratio"), kwargs.get("resolution"), kwargs.get("width"), kwargs.get("height")]
|
tags = [model, kwargs.get("aspect_ratio"), kwargs.get("resolution"), kwargs.get("width"), kwargs.get("height")]
|
||||||
media = asyncio.run(copy_media(chunk.get_list(), chunk.get("cookies"), chunk.get("headers"), proxy=proxy, alt=chunk.alt, tags=tags))
|
media = asyncio.run(copy_media(chunk.get_list(), chunk.get("cookies"), chunk.get("headers"), proxy=proxy, alt=chunk.alt, tags=tags))
|
||||||
media = ImageResponse(media, chunk.alt) if isinstance(chunk, ImageResponse) else VideoResponse(media, chunk.alt)
|
media = ImageResponse(media, chunk.alt) if isinstance(chunk, ImageResponse) else VideoResponse(media, chunk.alt)
|
||||||
yield self._format_json("content", str(media), urls=chunk.urls, alt=chunk.alt)
|
yield self._format_json("content", str(media), urls=media.urls, alt=media.alt)
|
||||||
elif isinstance(chunk, SynthesizeData):
|
elif isinstance(chunk, SynthesizeData):
|
||||||
yield self._format_json("synthesize", chunk.get_dict())
|
yield self._format_json("synthesize", chunk.get_dict())
|
||||||
elif isinstance(chunk, TitleGeneration):
|
elif isinstance(chunk, TitleGeneration):
|
||||||
@@ -232,6 +232,11 @@ class Api:
|
|||||||
yield self._format_json('error', type(e).__name__, message=get_error_message(e))
|
yield self._format_json('error', type(e).__name__, message=get_error_message(e))
|
||||||
finally:
|
finally:
|
||||||
yield from self._yield_logs()
|
yield from self._yield_logs()
|
||||||
|
for tempfile in tempfiles:
|
||||||
|
try:
|
||||||
|
os.remove(tempfile)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(e)
|
||||||
|
|
||||||
def _yield_logs(self):
|
def _yield_logs(self):
|
||||||
if debug.logs:
|
if debug.logs:
|
||||||
@@ -252,8 +257,6 @@ class Api:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def handle_provider(self, provider_handler, model):
|
def handle_provider(self, provider_handler, model):
|
||||||
if isinstance(provider_handler, BaseRetryProvider) and provider_handler.last_provider is not None:
|
|
||||||
provider_handler = provider_handler.last_provider
|
|
||||||
if model:
|
if model:
|
||||||
return self._format_json("provider", {**provider_handler.get_dict(), "model": model})
|
return self._format_json("provider", {**provider_handler.get_dict(), "model": model})
|
||||||
return self._format_json("provider", provider_handler.get_dict())
|
return self._format_json("provider", provider_handler.get_dict())
|
||||||
|
|||||||
@@ -8,8 +8,7 @@ import asyncio
|
|||||||
import shutil
|
import shutil
|
||||||
import random
|
import random
|
||||||
import datetime
|
import datetime
|
||||||
import tempfile
|
from flask import Flask, Response, redirect, request, jsonify, send_from_directory
|
||||||
from flask import Flask, Response, redirect, request, jsonify, render_template, send_from_directory
|
|
||||||
from werkzeug.exceptions import NotFound
|
from werkzeug.exceptions import NotFound
|
||||||
from typing import Generator
|
from typing import Generator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -17,19 +16,20 @@ from urllib.parse import quote_plus
|
|||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from markitdown import MarkItDown
|
from ...integration.markitdown import MarkItDown, StreamInfo
|
||||||
has_markitdown = True
|
has_markitdown = True
|
||||||
except ImportError:
|
except ImportError as e:
|
||||||
|
print(e)
|
||||||
has_markitdown = False
|
has_markitdown = False
|
||||||
|
|
||||||
from ...client.service import convert_to_provider
|
from ...client.service import convert_to_provider
|
||||||
from ...providers.asyncio import to_sync_generator
|
from ...providers.asyncio import to_sync_generator
|
||||||
from ...providers.response import FinishReason
|
from ...providers.response import FinishReason
|
||||||
from ...client.helper import filter_markdown
|
from ...client.helper import filter_markdown
|
||||||
from ...tools.files import supports_filename, get_streaming, get_bucket_dir, get_buckets
|
from ...tools.files import supports_filename, get_streaming, get_bucket_dir, get_tempfile
|
||||||
from ...tools.run_tools import iter_run_tools
|
from ...tools.run_tools import iter_run_tools
|
||||||
from ...errors import ProviderNotFoundError
|
from ...errors import ProviderNotFoundError
|
||||||
from ...image import is_allowed_extension
|
from ...image import is_allowed_extension, MEDIA_TYPE_MAP
|
||||||
from ...cookies import get_cookies_dir
|
from ...cookies import get_cookies_dir
|
||||||
from ...image.copy_images import secure_filename, get_source_url, get_media_dir
|
from ...image.copy_images import secure_filename, get_source_url, get_media_dir
|
||||||
from ... import ChatCompletion
|
from ... import ChatCompletion
|
||||||
@@ -79,9 +79,7 @@ class Backend_Api(Api):
|
|||||||
@app.route('/backend-api/v2/providers', methods=['GET'])
|
@app.route('/backend-api/v2/providers', methods=['GET'])
|
||||||
def jsonify_providers(**kwargs):
|
def jsonify_providers(**kwargs):
|
||||||
response = self.get_providers(**kwargs)
|
response = self.get_providers(**kwargs)
|
||||||
if isinstance(response, list):
|
|
||||||
return jsonify(response)
|
return jsonify(response)
|
||||||
return response
|
|
||||||
|
|
||||||
def get_demo_models():
|
def get_demo_models():
|
||||||
return [{
|
return [{
|
||||||
@@ -91,7 +89,7 @@ class Backend_Api(Api):
|
|||||||
"audio": isinstance(model, models.AudioModel),
|
"audio": isinstance(model, models.AudioModel),
|
||||||
"video": isinstance(model, models.VideoModel),
|
"video": isinstance(model, models.VideoModel),
|
||||||
"providers": [
|
"providers": [
|
||||||
getattr(provider, "parent", provider.__name__)
|
provider.get_parent()
|
||||||
for provider in providers
|
for provider in providers
|
||||||
],
|
],
|
||||||
"demo": True
|
"demo": True
|
||||||
@@ -109,13 +107,14 @@ class Backend_Api(Api):
|
|||||||
json_data = json.loads(request.form['json'])
|
json_data = json.loads(request.form['json'])
|
||||||
else:
|
else:
|
||||||
json_data = request.json
|
json_data = request.json
|
||||||
|
tempfiles = []
|
||||||
if "files" in request.files:
|
if "files" in request.files:
|
||||||
media = []
|
media = []
|
||||||
for file in request.files.getlist('files'):
|
for file in request.files.getlist('files'):
|
||||||
if file.filename != '' and is_allowed_extension(file.filename):
|
if file.filename != '' and is_allowed_extension(file.filename):
|
||||||
newfile = tempfile.TemporaryFile()
|
newfile = get_tempfile(file)
|
||||||
shutil.copyfileobj(file.stream, newfile)
|
tempfiles.append(newfile)
|
||||||
media.append((newfile, file.filename))
|
media.append((Path(newfile), file.filename))
|
||||||
json_data['media'] = media
|
json_data['media'] = media
|
||||||
|
|
||||||
if app.demo and not json_data.get("provider"):
|
if app.demo and not json_data.get("provider"):
|
||||||
@@ -130,6 +129,7 @@ class Backend_Api(Api):
|
|||||||
kwargs,
|
kwargs,
|
||||||
json_data.get("provider"),
|
json_data.get("provider"),
|
||||||
json_data.get("download_media", True),
|
json_data.get("download_media", True),
|
||||||
|
tempfiles
|
||||||
),
|
),
|
||||||
mimetype='text/event-stream'
|
mimetype='text/event-stream'
|
||||||
)
|
)
|
||||||
@@ -306,41 +306,46 @@ class Backend_Api(Api):
|
|||||||
filenames = []
|
filenames = []
|
||||||
media = []
|
media = []
|
||||||
for file in request.files.getlist('files'):
|
for file in request.files.getlist('files'):
|
||||||
# Copy the file to a temporary location
|
|
||||||
filename = secure_filename(file.filename)
|
filename = secure_filename(file.filename)
|
||||||
copyfile = tempfile.NamedTemporaryFile(suffix=filename, delete=False)
|
mimetype = file.mimetype.split(";")[0]
|
||||||
shutil.copyfileobj(file.stream, copyfile)
|
if (not filename or filename == "blob") and mimetype in MEDIA_TYPE_MAP:
|
||||||
copyfile.close()
|
filename = f"file.{MEDIA_TYPE_MAP[mimetype]}"
|
||||||
file.stream.close()
|
suffix = os.path.splitext(filename)[1].lower()
|
||||||
|
copyfile = get_tempfile(file, suffix)
|
||||||
result = None
|
result = None
|
||||||
if has_markitdown:
|
if has_markitdown:
|
||||||
try:
|
try:
|
||||||
|
language = request.headers.get("x-recognition-language")
|
||||||
md = MarkItDown()
|
md = MarkItDown()
|
||||||
result = md.convert(copyfile.name).text_content
|
result = md.convert(copyfile, stream_info=StreamInfo(
|
||||||
|
extension=suffix,
|
||||||
|
mimetype=file.mimetype,
|
||||||
|
), language=language).text_content
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(e)
|
||||||
|
is_media = is_allowed_extension(filename)
|
||||||
|
is_supported = supports_filename(filename)
|
||||||
|
if not is_media and not is_supported:
|
||||||
|
os.remove(copyfile)
|
||||||
|
continue
|
||||||
|
if not is_media and result:
|
||||||
with open(os.path.join(bucket_dir, f"{filename}.md"), 'w') as f:
|
with open(os.path.join(bucket_dir, f"{filename}.md"), 'w') as f:
|
||||||
f.write(f"{result}\n")
|
f.write(f"{result}\n")
|
||||||
filenames.append(f"{filename}.md")
|
filenames.append(f"{filename}.md")
|
||||||
except Exception as e:
|
if is_media:
|
||||||
logger.exception(e)
|
|
||||||
if not result:
|
|
||||||
if is_allowed_extension(filename):
|
|
||||||
os.makedirs(media_dir, exist_ok=True)
|
os.makedirs(media_dir, exist_ok=True)
|
||||||
newfile = os.path.join(media_dir, filename)
|
newfile = os.path.join(media_dir, filename)
|
||||||
media.append(filename)
|
media.append({"name": filename, "text": result})
|
||||||
elif supports_filename(filename):
|
elif not result and supports_filename(filename):
|
||||||
newfile = os.path.join(bucket_dir, filename)
|
newfile = os.path.join(bucket_dir, filename)
|
||||||
filenames.append(filename)
|
filenames.append(filename)
|
||||||
else:
|
|
||||||
os.remove(copyfile.name)
|
|
||||||
continue
|
|
||||||
try:
|
try:
|
||||||
os.rename(copyfile.name, newfile)
|
os.rename(copyfile, newfile)
|
||||||
except OSError:
|
except OSError:
|
||||||
shutil.copyfile(copyfile.name, newfile)
|
shutil.copyfile(copyfile, newfile)
|
||||||
os.remove(copyfile.name)
|
os.remove(copyfile)
|
||||||
with open(os.path.join(bucket_dir, "files.txt"), 'w') as f:
|
with open(os.path.join(bucket_dir, "files.txt"), 'w') as f:
|
||||||
[f.write(f"{filename}\n") for filename in filenames]
|
f.write("".join([f"{filename}\n" for filename in filenames]))
|
||||||
return {"bucket_id": bucket_id, "files": filenames, "media": media}
|
return {"bucket_id": bucket_id, "files": filenames, "media": media}
|
||||||
|
|
||||||
@app.route('/files/<bucket_id>/media/<filename>', methods=['GET'])
|
@app.route('/files/<bucket_id>/media/<filename>', methods=['GET'])
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ EXTENSIONS_MAP: dict[str, str] = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
MEDIA_TYPE_MAP: dict[str, str] = {value: key for key, value in EXTENSIONS_MAP.items()}
|
MEDIA_TYPE_MAP: dict[str, str] = {value: key for key, value in EXTENSIONS_MAP.items()}
|
||||||
|
MEDIA_TYPE_MAP["audio/webm"] = "webm"
|
||||||
|
|
||||||
def to_image(image: ImageType, is_svg: bool = False) -> Image:
|
def to_image(image: ImageType, is_svg: bool = False) -> Image:
|
||||||
"""
|
"""
|
||||||
@@ -111,7 +112,7 @@ def is_data_an_audio(data_uri: str = None, filename: str = None) -> str:
|
|||||||
extension = get_extension(filename)
|
extension = get_extension(filename)
|
||||||
if extension is not None:
|
if extension is not None:
|
||||||
media_type = EXTENSIONS_MAP[extension]
|
media_type = EXTENSIONS_MAP[extension]
|
||||||
if media_type.startswith("audio/"):
|
if media_type.startswith("audio/") or media_type == "video/webm":
|
||||||
return media_type
|
return media_type
|
||||||
if isinstance(data_uri, str):
|
if isinstance(data_uri, str):
|
||||||
audio_format = re.match(r'^data:(audio/\w+);base64,', data_uri)
|
audio_format = re.match(r'^data:(audio/\w+);base64,', data_uri)
|
||||||
|
|||||||
120
g4f/integration/markitdown/__init__.py
Normal file
120
g4f/integration/markitdown/__init__.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from typing import List, Union, BinaryIO
|
||||||
|
from markitdown import MarkItDown as BaseMarkItDown
|
||||||
|
from markitdown._stream_info import StreamInfo
|
||||||
|
from markitdown._base_converter import DocumentConverterResult
|
||||||
|
|
||||||
|
from markitdown._exceptions import (
|
||||||
|
FileConversionException,
|
||||||
|
UnsupportedFormatException,
|
||||||
|
FailedConversionAttempt,
|
||||||
|
)
|
||||||
|
|
||||||
|
from ._audio_converter import AudioConverter
|
||||||
|
from ._image_converter import ImageConverter
|
||||||
|
|
||||||
|
class MarkItDown(BaseMarkItDown):
|
||||||
|
"""(In preview) An extremely simple text-based document reader, suitable for LLM use.
|
||||||
|
This reader will convert common file-types or webpages to Markdown."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.register_converter(AudioConverter())
|
||||||
|
self.register_converter(ImageConverter())
|
||||||
|
|
||||||
|
def _convert(
|
||||||
|
self, *, file_stream: BinaryIO, stream_info_guesses: List[StreamInfo], **kwargs
|
||||||
|
) -> DocumentConverterResult:
|
||||||
|
res: Union[None, DocumentConverterResult] = None
|
||||||
|
|
||||||
|
# Keep track of which converters throw exceptions
|
||||||
|
failed_attempts: List[FailedConversionAttempt] = []
|
||||||
|
|
||||||
|
# Create a copy of the page_converters list, sorted by priority.
|
||||||
|
# We do this with each call to _convert because the priority of converters may change between calls.
|
||||||
|
# The sort is guaranteed to be stable, so converters with the same priority will remain in the same order.
|
||||||
|
sorted_registrations = sorted(self._converters, key=lambda x: x.priority)
|
||||||
|
|
||||||
|
# Remember the initial stream position so that we can return to it
|
||||||
|
cur_pos = file_stream.tell()
|
||||||
|
|
||||||
|
for stream_info in stream_info_guesses + [StreamInfo()]:
|
||||||
|
for converter_registration in sorted_registrations:
|
||||||
|
converter = converter_registration.converter
|
||||||
|
# Sanity check -- make sure the cur_pos is still the same
|
||||||
|
assert (
|
||||||
|
cur_pos == file_stream.tell()
|
||||||
|
), f"File stream position should NOT change between guess iterations"
|
||||||
|
|
||||||
|
_kwargs = {k: v for k, v in kwargs.items()}
|
||||||
|
|
||||||
|
# Copy any additional global options
|
||||||
|
if "llm_client" not in _kwargs and self._llm_client is not None:
|
||||||
|
_kwargs["llm_client"] = self._llm_client
|
||||||
|
|
||||||
|
if "llm_model" not in _kwargs and self._llm_model is not None:
|
||||||
|
_kwargs["llm_model"] = self._llm_model
|
||||||
|
|
||||||
|
if "style_map" not in _kwargs and self._style_map is not None:
|
||||||
|
_kwargs["style_map"] = self._style_map
|
||||||
|
|
||||||
|
if "exiftool_path" not in _kwargs and self._exiftool_path is not None:
|
||||||
|
_kwargs["exiftool_path"] = self._exiftool_path
|
||||||
|
|
||||||
|
# Add the list of converters for nested processing
|
||||||
|
_kwargs["_parent_converters"] = self._converters
|
||||||
|
|
||||||
|
# Add legaxy kwargs
|
||||||
|
if stream_info is not None:
|
||||||
|
if stream_info.extension is not None:
|
||||||
|
_kwargs["file_extension"] = stream_info.extension
|
||||||
|
|
||||||
|
if stream_info.url is not None:
|
||||||
|
_kwargs["url"] = stream_info.url
|
||||||
|
|
||||||
|
# Check if the converter will accept the file, and if so, try to convert it
|
||||||
|
_accepts = False
|
||||||
|
try:
|
||||||
|
_accepts = converter.accepts(file_stream, stream_info, **_kwargs)
|
||||||
|
except NotImplementedError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# accept() should not have changed the file stream position
|
||||||
|
assert (
|
||||||
|
cur_pos == file_stream.tell()
|
||||||
|
), f"{type(converter).__name__}.accept() should NOT change the file_stream position"
|
||||||
|
|
||||||
|
# Attempt the conversion
|
||||||
|
if _accepts:
|
||||||
|
try:
|
||||||
|
res = converter.convert(file_stream, stream_info, **_kwargs)
|
||||||
|
except Exception:
|
||||||
|
failed_attempts.append(
|
||||||
|
FailedConversionAttempt(
|
||||||
|
converter=converter, exc_info=sys.exc_info()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
file_stream.seek(cur_pos)
|
||||||
|
|
||||||
|
if res is not None:
|
||||||
|
if isinstance(res.text_content, str):
|
||||||
|
# Normalize the content
|
||||||
|
res.text_content = "\n".join(
|
||||||
|
[line.rstrip() for line in re.split(r"\r?\n", res.text_content)]
|
||||||
|
)
|
||||||
|
res.text_content = re.sub(r"\n{3,}", "\n\n", res.text_content)
|
||||||
|
return res
|
||||||
|
|
||||||
|
# If we got this far without success, report any exceptions
|
||||||
|
if len(failed_attempts) > 0:
|
||||||
|
raise FileConversionException(attempts=failed_attempts)
|
||||||
|
|
||||||
|
# Nothing can handle it!
|
||||||
|
raise UnsupportedFormatException(
|
||||||
|
f"Could not convert stream to Markdown. No converter attempted a conversion, suggesting that the filetype is simply not supported."
|
||||||
|
)
|
||||||
105
g4f/integration/markitdown/_audio_converter.py
Normal file
105
g4f/integration/markitdown/_audio_converter.py
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
from typing import Any, BinaryIO
|
||||||
|
|
||||||
|
from markitdown.converters._exiftool import exiftool_metadata
|
||||||
|
from markitdown._base_converter import DocumentConverter, DocumentConverterResult
|
||||||
|
from markitdown._stream_info import StreamInfo
|
||||||
|
from markitdown._exceptions import MissingDependencyException
|
||||||
|
|
||||||
|
from ._transcribe_audio import transcribe_audio
|
||||||
|
|
||||||
|
ACCEPTED_MIME_TYPE_PREFIXES = [
|
||||||
|
"audio/x-wav",
|
||||||
|
"audio/mpeg",
|
||||||
|
"video/mp4",
|
||||||
|
"video/webm",
|
||||||
|
"audio/webm",
|
||||||
|
]
|
||||||
|
|
||||||
|
ACCEPTED_FILE_EXTENSIONS = [
|
||||||
|
".wav",
|
||||||
|
".mp3",
|
||||||
|
".m4a",
|
||||||
|
".mp4",
|
||||||
|
".webm",
|
||||||
|
]
|
||||||
|
|
||||||
|
class AudioConverter(DocumentConverter):
|
||||||
|
"""
|
||||||
|
Converts audio files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` is installed).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def accepts(
|
||||||
|
self,
|
||||||
|
file_stream: BinaryIO,
|
||||||
|
stream_info: StreamInfo,
|
||||||
|
**kwargs: Any, # Options to pass to the converter
|
||||||
|
) -> bool:
|
||||||
|
mimetype = (stream_info.mimetype or "").lower()
|
||||||
|
extension = (stream_info.extension or "").lower()
|
||||||
|
|
||||||
|
if extension in ACCEPTED_FILE_EXTENSIONS:
|
||||||
|
return True
|
||||||
|
|
||||||
|
for prefix in ACCEPTED_MIME_TYPE_PREFIXES:
|
||||||
|
if mimetype.startswith(prefix):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def convert(
|
||||||
|
self,
|
||||||
|
file_stream: BinaryIO,
|
||||||
|
stream_info: StreamInfo,
|
||||||
|
language: str = "en-US",
|
||||||
|
**kwargs: Any, # Options to pass to the converter
|
||||||
|
) -> DocumentConverterResult:
|
||||||
|
md_content = ""
|
||||||
|
|
||||||
|
# Add metadata
|
||||||
|
metadata = exiftool_metadata(
|
||||||
|
file_stream, exiftool_path=kwargs.get("exiftool_path")
|
||||||
|
)
|
||||||
|
if metadata:
|
||||||
|
for f in [
|
||||||
|
"Title",
|
||||||
|
"Artist",
|
||||||
|
"Author",
|
||||||
|
"Band",
|
||||||
|
"Album",
|
||||||
|
"Genre",
|
||||||
|
"Track",
|
||||||
|
"DateTimeOriginal",
|
||||||
|
"CreateDate",
|
||||||
|
# "Duration", -- Wrong values when read from memory
|
||||||
|
"NumChannels",
|
||||||
|
"SampleRate",
|
||||||
|
"AvgBytesPerSec",
|
||||||
|
"BitsPerSample",
|
||||||
|
]:
|
||||||
|
if f in metadata:
|
||||||
|
md_content += f"{f}: {metadata[f]}\n"
|
||||||
|
|
||||||
|
# Figure out the audio format for transcription
|
||||||
|
if stream_info.extension == ".wav" or stream_info.mimetype == "audio/x-wav":
|
||||||
|
audio_format = "wav"
|
||||||
|
elif stream_info.extension == ".mp3" or stream_info.mimetype == "audio/mpeg":
|
||||||
|
audio_format = "mp3"
|
||||||
|
elif (
|
||||||
|
stream_info.extension in [".mp4", ".m4a"]
|
||||||
|
or stream_info.mimetype == "video/mp4"
|
||||||
|
):
|
||||||
|
audio_format = "mp4"
|
||||||
|
elif stream_info.extension == ".webm" or stream_info.mimetype in ("audio/webm", "video/webm"):
|
||||||
|
audio_format = "webm"
|
||||||
|
else:
|
||||||
|
audio_format = None
|
||||||
|
|
||||||
|
# Transcribe
|
||||||
|
if audio_format:
|
||||||
|
try:
|
||||||
|
md_content = transcribe_audio(file_stream, audio_format=audio_format, language=language)
|
||||||
|
except MissingDependencyException:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Return the result
|
||||||
|
return DocumentConverterResult(markdown=md_content.strip())
|
||||||
10
g4f/integration/markitdown/_base_converter.py
Normal file
10
g4f/integration/markitdown/_base_converter.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from typing import Awaitable
|
||||||
|
|
||||||
|
class AsyncDocumentConverterResult:
|
||||||
|
"""The result of converting a document to Markdown."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
text_content: Awaitable[str],
|
||||||
|
):
|
||||||
|
self.text_content = text_content
|
||||||
92
g4f/integration/markitdown/_image_converter.py
Normal file
92
g4f/integration/markitdown/_image_converter.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
from typing import BinaryIO, Any
|
||||||
|
import asyncio
|
||||||
|
from markitdown._base_converter import DocumentConverter, DocumentConverterResult
|
||||||
|
from markitdown._stream_info import StreamInfo
|
||||||
|
from markitdown.converters._llm_caption import llm_caption
|
||||||
|
from markitdown.converters._exiftool import exiftool_metadata
|
||||||
|
|
||||||
|
from ._base_converter import AsyncDocumentConverterResult
|
||||||
|
|
||||||
|
ACCEPTED_MIME_TYPE_PREFIXES = [
|
||||||
|
"image/jpeg",
|
||||||
|
"image/png",
|
||||||
|
]
|
||||||
|
|
||||||
|
ACCEPTED_FILE_EXTENSIONS = [".jpg", ".jpeg", ".png"]
|
||||||
|
|
||||||
|
|
||||||
|
class ImageConverter(DocumentConverter):
|
||||||
|
"""
|
||||||
|
Converts images to markdown via extraction of metadata (if `exiftool` is installed), and description via a multimodal LLM (if an llm_client is configured).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def accepts(
|
||||||
|
self,
|
||||||
|
file_stream: BinaryIO,
|
||||||
|
stream_info: StreamInfo,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> bool:
|
||||||
|
mimetype = (stream_info.mimetype or "").lower()
|
||||||
|
extension = (stream_info.extension or "").lower()
|
||||||
|
|
||||||
|
if extension in ACCEPTED_FILE_EXTENSIONS:
|
||||||
|
return True
|
||||||
|
|
||||||
|
for prefix in ACCEPTED_MIME_TYPE_PREFIXES:
|
||||||
|
if mimetype.startswith(prefix):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def convert(
|
||||||
|
self,
|
||||||
|
file_stream: BinaryIO,
|
||||||
|
stream_info: StreamInfo,
|
||||||
|
**kwargs: Any, # Options to pass to the converter
|
||||||
|
) -> DocumentConverterResult:
|
||||||
|
md_content = ""
|
||||||
|
|
||||||
|
# Add metadata
|
||||||
|
metadata = exiftool_metadata(
|
||||||
|
file_stream, exiftool_path=kwargs.get("exiftool_path")
|
||||||
|
)
|
||||||
|
|
||||||
|
if metadata:
|
||||||
|
for f in [
|
||||||
|
"ImageSize",
|
||||||
|
"Title",
|
||||||
|
"Caption",
|
||||||
|
"Description",
|
||||||
|
"Keywords",
|
||||||
|
"Artist",
|
||||||
|
"Author",
|
||||||
|
"DateTimeOriginal",
|
||||||
|
"CreateDate",
|
||||||
|
"GPSPosition",
|
||||||
|
]:
|
||||||
|
if f in metadata:
|
||||||
|
md_content += f"{f}: {metadata[f]}\n"
|
||||||
|
|
||||||
|
# Try describing the image with GPT
|
||||||
|
llm_client = kwargs.get("llm_client")
|
||||||
|
llm_model = kwargs.get("llm_model")
|
||||||
|
if llm_client is not None and llm_model is not None:
|
||||||
|
llm_description = llm_caption(
|
||||||
|
file_stream,
|
||||||
|
stream_info,
|
||||||
|
client=llm_client,
|
||||||
|
model=llm_model,
|
||||||
|
prompt=kwargs.get("llm_prompt"),
|
||||||
|
)
|
||||||
|
|
||||||
|
if asyncio.iscoroutine(llm_description):
|
||||||
|
return AsyncDocumentConverterResult(
|
||||||
|
llm_description,
|
||||||
|
)
|
||||||
|
|
||||||
|
if llm_description is not None:
|
||||||
|
md_content += "\n# Description:\n" + llm_description.strip() + "\n"
|
||||||
|
|
||||||
|
return DocumentConverterResult(
|
||||||
|
markdown=md_content,
|
||||||
|
)
|
||||||
56
g4f/integration/markitdown/_llm_caption.py
Normal file
56
g4f/integration/markitdown/_llm_caption.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
from typing import BinaryIO, Union, Awaitable
|
||||||
|
import base64
|
||||||
|
import mimetypes
|
||||||
|
import asyncio
|
||||||
|
from markitdown._stream_info import StreamInfo
|
||||||
|
|
||||||
|
|
||||||
|
def llm_caption(
|
||||||
|
file_stream: BinaryIO, stream_info: StreamInfo, *, client, model, prompt=None
|
||||||
|
) -> Union[None, str, Awaitable[str]]:
|
||||||
|
if prompt is None or prompt.strip() == "":
|
||||||
|
prompt = "Write a detailed caption for this image."
|
||||||
|
|
||||||
|
# Get the content type
|
||||||
|
content_type = stream_info.mimetype
|
||||||
|
if not content_type:
|
||||||
|
content_type, _ = mimetypes.guess_type("_dummy" + (stream_info.extension or ""))
|
||||||
|
if not content_type:
|
||||||
|
content_type = "application/octet-stream"
|
||||||
|
|
||||||
|
# Convert to base64
|
||||||
|
cur_pos = file_stream.tell()
|
||||||
|
try:
|
||||||
|
base64_image = base64.b64encode(file_stream.read()).decode("utf-8")
|
||||||
|
except Exception as e:
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
file_stream.seek(cur_pos)
|
||||||
|
|
||||||
|
# Prepare the data-uri
|
||||||
|
data_uri = f"data:{content_type};base64,{base64_image}"
|
||||||
|
|
||||||
|
# Prepare the OpenAI API request
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": data_uri,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Call the OpenAI API
|
||||||
|
response = client.chat.completions.create(model=model, messages=messages)
|
||||||
|
if asyncio.iscoroutine(response):
|
||||||
|
async def read_content(response):
|
||||||
|
response = await response
|
||||||
|
return response.choices[0].message.content
|
||||||
|
return read_content(response)
|
||||||
|
return response.choices[0].message.content
|
||||||
49
g4f/integration/markitdown/_transcribe_audio.py
Normal file
49
g4f/integration/markitdown/_transcribe_audio.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import io
|
||||||
|
import sys
|
||||||
|
from typing import BinaryIO
|
||||||
|
from markitdown._exceptions import MissingDependencyException
|
||||||
|
|
||||||
|
# Try loading optional (but in this case, required) dependencies
|
||||||
|
# Save reporting of any exceptions for later
|
||||||
|
_dependency_exc_info = None
|
||||||
|
try:
|
||||||
|
# Suppress some warnings on library import
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
||||||
|
warnings.filterwarnings("ignore", category=SyntaxWarning)
|
||||||
|
import speech_recognition as sr
|
||||||
|
import pydub
|
||||||
|
except ImportError:
|
||||||
|
# Preserve the error and stack trace for later
|
||||||
|
_dependency_exc_info = sys.exc_info()
|
||||||
|
|
||||||
|
|
||||||
|
def transcribe_audio(file_stream: BinaryIO, *, audio_format: str = "wav", language: str = "en-US") -> str:
|
||||||
|
# Check for installed dependencies
|
||||||
|
if _dependency_exc_info is not None:
|
||||||
|
raise MissingDependencyException(
|
||||||
|
"Speech transcription requires installing MarkItdown with the [audio-transcription] optional dependencies. E.g., `pip install markitdown[audio-transcription]` or `pip install markitdown[all]`"
|
||||||
|
) from _dependency_exc_info[
|
||||||
|
1
|
||||||
|
].with_traceback( # type: ignore[union-attr]
|
||||||
|
_dependency_exc_info[2]
|
||||||
|
)
|
||||||
|
|
||||||
|
if audio_format in ["wav", "aiff", "flac"]:
|
||||||
|
audio_source = file_stream
|
||||||
|
elif audio_format in ["mp3", "mp4", "webm"]:
|
||||||
|
audio_segment = pydub.AudioSegment.from_file(file_stream, format=audio_format)
|
||||||
|
|
||||||
|
audio_source = io.BytesIO()
|
||||||
|
audio_segment.export(audio_source, format="wav")
|
||||||
|
audio_source.seek(0)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported audio format: {audio_format}")
|
||||||
|
|
||||||
|
recognizer = sr.Recognizer()
|
||||||
|
with sr.AudioFile(audio_source) as source:
|
||||||
|
audio = recognizer.record(source)
|
||||||
|
transcript = recognizer.recognize_google(audio, language=language).strip()
|
||||||
|
return "[No speech detected]" if transcript == "" else transcript
|
||||||
@@ -114,10 +114,7 @@ def format_images_markdown(images: Union[str, List[str]], alt: str,
|
|||||||
)
|
)
|
||||||
for idx, image in enumerate(images)
|
for idx, image in enumerate(images)
|
||||||
)
|
)
|
||||||
|
return result
|
||||||
start_flag = "<!-- generated images start -->\n"
|
|
||||||
end_flag = "<!-- generated images end -->\n"
|
|
||||||
return f"\n{start_flag}{result}\n{end_flag}\n"
|
|
||||||
|
|
||||||
class ResponseType:
|
class ResponseType:
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|||||||
@@ -586,7 +586,7 @@ async def get_async_streaming(bucket_dir: str, delete_files = False, refine_chun
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
def get_tempfile(file, suffix):
|
def get_tempfile(file, suffix):
|
||||||
copyfile = tempfile.NamedTemporaryFile(suffix=os.path.splitext(suffix)[-1], delete=False)
|
copyfile = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
|
||||||
shutil.copyfileobj(file, copyfile)
|
shutil.copyfileobj(file, copyfile)
|
||||||
copyfile.close()
|
copyfile.close()
|
||||||
file.close()
|
file.close()
|
||||||
|
|||||||
@@ -24,6 +24,12 @@ def render_media(bucket_id: str, name: str, url: str, as_path: bool = False, as_
|
|||||||
def render_part(part: dict) -> dict:
|
def render_part(part: dict) -> dict:
|
||||||
if "type" in part:
|
if "type" in part:
|
||||||
return part
|
return part
|
||||||
|
text = part.get("text")
|
||||||
|
if text:
|
||||||
|
return {
|
||||||
|
"type": "text",
|
||||||
|
"text": text
|
||||||
|
}
|
||||||
filename = part.get("name")
|
filename = part.get("name")
|
||||||
if (filename is None):
|
if (filename is None):
|
||||||
bucket_dir = Path(get_bucket_dir(part.get("bucket_id")))
|
bucket_dir = Path(get_bucket_dir(part.get("bucket_id")))
|
||||||
|
|||||||
Reference in New Issue
Block a user