mirror of
https://github.com/photoprism/photoprism.git
synced 2025-09-27 05:08:13 +08:00
Setup: Update Ollama service examples in compose.alldbms.yaml file
This commit is contained in:
@@ -273,21 +273,22 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
## Ollama Configuration Options:
|
## Ollama Configuration Options:
|
||||||
OLLAMA_HOST: "0.0.0.0:11434"
|
OLLAMA_HOST: "0.0.0.0:11434"
|
||||||
OLLAMA_MODELS: "/root/.ollama" # model storage path (see volumes section below)
|
OLLAMA_MODELS: "/root/.ollama" # model storage path (see volumes section below)
|
||||||
OLLAMA_MAX_QUEUE: "100" # maximum number of queued requests
|
OLLAMA_MAX_QUEUE: "100" # maximum number of queued requests
|
||||||
OLLAMA_NUM_PARALLEL: "1" # maximum number of parallel requests
|
OLLAMA_NUM_PARALLEL: "1" # maximum number of parallel requests
|
||||||
OLLAMA_MAX_LOADED_MODELS: "1" # maximum number of loaded models per GPU
|
OLLAMA_MAX_LOADED_MODELS: "1" # maximum number of loaded models per GPU
|
||||||
OLLAMA_LOAD_TIMEOUT: "5m" # maximum time for loading models (default "5m")
|
OLLAMA_LOAD_TIMEOUT: "5m" # maximum time for loading models (default "5m")
|
||||||
OLLAMA_KEEP_ALIVE: "10m" # duration that models stay loaded in memory (default "5m")
|
OLLAMA_KEEP_ALIVE: "5m" # duration that models stay loaded in memory (default "5m")
|
||||||
OLLAMA_CONTEXT_LENGTH: "4096" # maximum input context length
|
OLLAMA_CONTEXT_LENGTH: "4096" # maximum input context length
|
||||||
OLLAMA_MULTIUSER_CACHE: "1" # optimize prompt caching for multi-user scenarios
|
OLLAMA_MULTIUSER_CACHE: "false" # optimize prompt caching for multi-user scenarios
|
||||||
# OLLAMA_DEBUG: "1" # shows additional debug information
|
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
||||||
# OLLAMA_NOPRUNE: "1" # disables pruning of model blobs at startup
|
OLLAMA_NOHISTORY: "true" # disables readline history
|
||||||
# OLLAMA_NOHISTORY: "1" # disables readline history
|
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
||||||
# OLLAMA_FLASH_ATTENTION: "1" # enables the experimental flash attention feature
|
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
|
||||||
# OLLAMA_SCHED_SPREAD: "1" # allows scheduling models across all GPUs.
|
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
||||||
# OLLAMA_GPU_OVERHEAD: "0" # reserves a portion of VRAM per GPU (bytes)
|
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
||||||
# OLLAMA_INTEL_GPU: "1" # enables experimental Intel GPU detection
|
# OLLAMA_DEBUG: "true" # shows additional debug information
|
||||||
|
# OLLAMA_INTEL_GPU: "true" # enables experimental Intel GPU detection
|
||||||
## NVIDIA GPU Hardware Acceleration (optional):
|
## NVIDIA GPU Hardware Acceleration (optional):
|
||||||
# NVIDIA_VISIBLE_DEVICES: "all"
|
# NVIDIA_VISIBLE_DEVICES: "all"
|
||||||
# NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
|
# NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
|
||||||
|
Reference in New Issue
Block a user