Move file functions

This commit is contained in:
hlohaus
2025-06-24 22:32:59 +02:00
parent 744dfeb957
commit 5b4f98c069
3 changed files with 28 additions and 29 deletions

26
g4f/files.py Normal file
View File

@@ -0,0 +1,26 @@
from __future__ import annotations
import re
from urllib.parse import unquote
import os
from .cookies import get_cookies_dir
def secure_filename(filename: str) -> str:
if filename is None:
return None
# Keep letters, numbers, basic punctuation and all Unicode chars
filename = re.sub(
r'[^\w.,_+-]+',
'_',
unquote(filename).strip(),
flags=re.UNICODE
)
encoding = 'utf-8'
max_length = 100
encoded = filename.encode(encoding)[:max_length]
decoded = encoded.decode(encoding, 'ignore')
return decoded.strip(".,_+-")
def get_bucket_dir(*parts):
return os.path.join(get_cookies_dir(), "buckets", *[secure_filename(part) for part in parts if part])

View File

@@ -17,7 +17,7 @@ except ImportError:
from ..typing import ImageType from ..typing import ImageType
from ..errors import MissingRequirementsError from ..errors import MissingRequirementsError
from ..tools.files import get_bucket_dir from ..files import get_bucket_dir
EXTENSIONS_MAP: dict[str, str] = { EXTENSIONS_MAP: dict[str, str] = {
# Image # Image

View File

@@ -7,7 +7,6 @@ from pathlib import Path
from typing import Iterator, Optional, AsyncIterator from typing import Iterator, Optional, AsyncIterator
from aiohttp import ClientSession, ClientError, ClientResponse, ClientTimeout from aiohttp import ClientSession, ClientError, ClientResponse, ClientTimeout
import urllib.parse import urllib.parse
from urllib.parse import unquote
import time import time
import zipfile import zipfile
import asyncio import asyncio
@@ -76,7 +75,7 @@ except ImportError:
has_markitdown = False has_markitdown = False
from .web_search import scrape_text from .web_search import scrape_text
from ..cookies import get_cookies_dir from ..files import secure_filename, get_bucket_dir
from ..image import is_allowed_extension from ..image import is_allowed_extension
from ..requests.aiohttp import get_connector from ..requests.aiohttp import get_connector
from ..providers.asyncio import to_sync_generator from ..providers.asyncio import to_sync_generator
@@ -88,22 +87,6 @@ PLAIN_CACHE = "plain.cache"
DOWNLOADS_FILE = "downloads.json" DOWNLOADS_FILE = "downloads.json"
FILE_LIST = "files.txt" FILE_LIST = "files.txt"
def secure_filename(filename: str) -> str:
if filename is None:
return None
# Keep letters, numbers, basic punctuation and all Unicode chars
filename = re.sub(
r'[^\w.,_+-]+',
'_',
unquote(filename).strip(),
flags=re.UNICODE
)
encoding = 'utf-8'
max_length = 100
encoded = filename.encode(encoding)[:max_length]
decoded = encoded.decode(encoding, 'ignore')
return decoded.strip(".,_+-")
def supports_filename(filename: str): def supports_filename(filename: str):
if filename.endswith(".pdf"): if filename.endswith(".pdf"):
if has_pypdf2: if has_pypdf2:
@@ -139,16 +122,6 @@ def supports_filename(filename: str):
return True return True
return False return False
def get_bucket_dir(*parts):
return os.path.join(get_cookies_dir(), "buckets", *[secure_filename(part) for part in parts if part])
def get_buckets():
buckets_dir = os.path.join(get_cookies_dir(), "buckets")
try:
return [d for d in os.listdir(buckets_dir) if os.path.isdir(os.path.join(buckets_dir, d))]
except OSError:
return None
def spacy_refine_chunks(source_iterator): def spacy_refine_chunks(source_iterator):
if not has_spacy: if not has_spacy:
raise MissingRequirementsError(f'Install "spacy" requirements | pip install -U g4f[files]') raise MissingRequirementsError(f'Install "spacy" requirements | pip install -U g4f[files]')