Move file functions

This commit is contained in:
hlohaus
2025-06-24 22:32:59 +02:00
parent 744dfeb957
commit 5b4f98c069
3 changed files with 28 additions and 29 deletions

26
g4f/files.py Normal file
View File

@@ -0,0 +1,26 @@
from __future__ import annotations
import re
from urllib.parse import unquote
import os
from .cookies import get_cookies_dir
def secure_filename(filename: str) -> str:
if filename is None:
return None
# Keep letters, numbers, basic punctuation and all Unicode chars
filename = re.sub(
r'[^\w.,_+-]+',
'_',
unquote(filename).strip(),
flags=re.UNICODE
)
encoding = 'utf-8'
max_length = 100
encoded = filename.encode(encoding)[:max_length]
decoded = encoded.decode(encoding, 'ignore')
return decoded.strip(".,_+-")
def get_bucket_dir(*parts):
return os.path.join(get_cookies_dir(), "buckets", *[secure_filename(part) for part in parts if part])

View File

@@ -17,7 +17,7 @@ except ImportError:
from ..typing import ImageType
from ..errors import MissingRequirementsError
from ..tools.files import get_bucket_dir
from ..files import get_bucket_dir
EXTENSIONS_MAP: dict[str, str] = {
# Image

View File

@@ -7,7 +7,6 @@ from pathlib import Path
from typing import Iterator, Optional, AsyncIterator
from aiohttp import ClientSession, ClientError, ClientResponse, ClientTimeout
import urllib.parse
from urllib.parse import unquote
import time
import zipfile
import asyncio
@@ -76,7 +75,7 @@ except ImportError:
has_markitdown = False
from .web_search import scrape_text
from ..cookies import get_cookies_dir
from ..files import secure_filename, get_bucket_dir
from ..image import is_allowed_extension
from ..requests.aiohttp import get_connector
from ..providers.asyncio import to_sync_generator
@@ -88,22 +87,6 @@ PLAIN_CACHE = "plain.cache"
DOWNLOADS_FILE = "downloads.json"
FILE_LIST = "files.txt"
def secure_filename(filename: str) -> str:
if filename is None:
return None
# Keep letters, numbers, basic punctuation and all Unicode chars
filename = re.sub(
r'[^\w.,_+-]+',
'_',
unquote(filename).strip(),
flags=re.UNICODE
)
encoding = 'utf-8'
max_length = 100
encoded = filename.encode(encoding)[:max_length]
decoded = encoded.decode(encoding, 'ignore')
return decoded.strip(".,_+-")
def supports_filename(filename: str):
if filename.endswith(".pdf"):
if has_pypdf2:
@@ -139,16 +122,6 @@ def supports_filename(filename: str):
return True
return False
def get_bucket_dir(*parts):
return os.path.join(get_cookies_dir(), "buckets", *[secure_filename(part) for part in parts if part])
def get_buckets():
buckets_dir = os.path.join(get_cookies_dir(), "buckets")
try:
return [d for d in os.listdir(buckets_dir) if os.path.isdir(os.path.join(buckets_dir, d))]
except OSError:
return None
def spacy_refine_chunks(source_iterator):
if not has_spacy:
raise MissingRequirementsError(f'Install "spacy" requirements | pip install -U g4f[files]')