mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-10-25 09:10:32 +08:00
Move file functions
This commit is contained in:
26
g4f/files.py
Normal file
26
g4f/files.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from urllib.parse import unquote
|
||||||
|
import os
|
||||||
|
|
||||||
|
from .cookies import get_cookies_dir
|
||||||
|
|
||||||
|
def secure_filename(filename: str) -> str:
|
||||||
|
if filename is None:
|
||||||
|
return None
|
||||||
|
# Keep letters, numbers, basic punctuation and all Unicode chars
|
||||||
|
filename = re.sub(
|
||||||
|
r'[^\w.,_+-]+',
|
||||||
|
'_',
|
||||||
|
unquote(filename).strip(),
|
||||||
|
flags=re.UNICODE
|
||||||
|
)
|
||||||
|
encoding = 'utf-8'
|
||||||
|
max_length = 100
|
||||||
|
encoded = filename.encode(encoding)[:max_length]
|
||||||
|
decoded = encoded.decode(encoding, 'ignore')
|
||||||
|
return decoded.strip(".,_+-")
|
||||||
|
|
||||||
|
def get_bucket_dir(*parts):
|
||||||
|
return os.path.join(get_cookies_dir(), "buckets", *[secure_filename(part) for part in parts if part])
|
||||||
@@ -17,7 +17,7 @@ except ImportError:
|
|||||||
|
|
||||||
from ..typing import ImageType
|
from ..typing import ImageType
|
||||||
from ..errors import MissingRequirementsError
|
from ..errors import MissingRequirementsError
|
||||||
from ..tools.files import get_bucket_dir
|
from ..files import get_bucket_dir
|
||||||
|
|
||||||
EXTENSIONS_MAP: dict[str, str] = {
|
EXTENSIONS_MAP: dict[str, str] = {
|
||||||
# Image
|
# Image
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ from pathlib import Path
|
|||||||
from typing import Iterator, Optional, AsyncIterator
|
from typing import Iterator, Optional, AsyncIterator
|
||||||
from aiohttp import ClientSession, ClientError, ClientResponse, ClientTimeout
|
from aiohttp import ClientSession, ClientError, ClientResponse, ClientTimeout
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from urllib.parse import unquote
|
|
||||||
import time
|
import time
|
||||||
import zipfile
|
import zipfile
|
||||||
import asyncio
|
import asyncio
|
||||||
@@ -76,7 +75,7 @@ except ImportError:
|
|||||||
has_markitdown = False
|
has_markitdown = False
|
||||||
|
|
||||||
from .web_search import scrape_text
|
from .web_search import scrape_text
|
||||||
from ..cookies import get_cookies_dir
|
from ..files import secure_filename, get_bucket_dir
|
||||||
from ..image import is_allowed_extension
|
from ..image import is_allowed_extension
|
||||||
from ..requests.aiohttp import get_connector
|
from ..requests.aiohttp import get_connector
|
||||||
from ..providers.asyncio import to_sync_generator
|
from ..providers.asyncio import to_sync_generator
|
||||||
@@ -88,22 +87,6 @@ PLAIN_CACHE = "plain.cache"
|
|||||||
DOWNLOADS_FILE = "downloads.json"
|
DOWNLOADS_FILE = "downloads.json"
|
||||||
FILE_LIST = "files.txt"
|
FILE_LIST = "files.txt"
|
||||||
|
|
||||||
def secure_filename(filename: str) -> str:
|
|
||||||
if filename is None:
|
|
||||||
return None
|
|
||||||
# Keep letters, numbers, basic punctuation and all Unicode chars
|
|
||||||
filename = re.sub(
|
|
||||||
r'[^\w.,_+-]+',
|
|
||||||
'_',
|
|
||||||
unquote(filename).strip(),
|
|
||||||
flags=re.UNICODE
|
|
||||||
)
|
|
||||||
encoding = 'utf-8'
|
|
||||||
max_length = 100
|
|
||||||
encoded = filename.encode(encoding)[:max_length]
|
|
||||||
decoded = encoded.decode(encoding, 'ignore')
|
|
||||||
return decoded.strip(".,_+-")
|
|
||||||
|
|
||||||
def supports_filename(filename: str):
|
def supports_filename(filename: str):
|
||||||
if filename.endswith(".pdf"):
|
if filename.endswith(".pdf"):
|
||||||
if has_pypdf2:
|
if has_pypdf2:
|
||||||
@@ -139,16 +122,6 @@ def supports_filename(filename: str):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_bucket_dir(*parts):
|
|
||||||
return os.path.join(get_cookies_dir(), "buckets", *[secure_filename(part) for part in parts if part])
|
|
||||||
|
|
||||||
def get_buckets():
|
|
||||||
buckets_dir = os.path.join(get_cookies_dir(), "buckets")
|
|
||||||
try:
|
|
||||||
return [d for d in os.listdir(buckets_dir) if os.path.isdir(os.path.join(buckets_dir, d))]
|
|
||||||
except OSError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def spacy_refine_chunks(source_iterator):
|
def spacy_refine_chunks(source_iterator):
|
||||||
if not has_spacy:
|
if not has_spacy:
|
||||||
raise MissingRequirementsError(f'Install "spacy" requirements | pip install -U g4f[files]')
|
raise MissingRequirementsError(f'Install "spacy" requirements | pip install -U g4f[files]')
|
||||||
|
|||||||
Reference in New Issue
Block a user