AI: Add photoprism-vision test service and caption API client #127 #1090

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer
2025-04-11 01:17:47 +02:00
parent 068ee52301
commit 6fcce84623
12 changed files with 233 additions and 6 deletions

View File

@@ -399,9 +399,9 @@ docker-build:
$(DOCKER_COMPOSE) build --pull
docker-nvidia: docker-nvidia-up
docker-nvidia-up:
docker compose -f compose.nvidia.yaml up
docker compose --profile=vision -f compose.nvidia.yaml up
docker-nvidia-build:
docker compose -f compose.nvidia.yaml up
docker compose --profile=vision -f compose.nvidia.yaml build
docker-intel: docker-intel-up
docker-intel-up:
docker compose -f compose.intel.yaml up

View File

@@ -22,6 +22,8 @@ services:
links:
- "traefik:localssl.dev"
- "traefik:app.localssl.dev"
- "traefik:vision.localssl.dev"
- "traefik:qdrant.localssl.dev"
- "traefik:keycloak.localssl.dev"
- "traefik:dummy-oidc.localssl.dev"
- "traefik:dummy-webdav.localssl.dev"
@@ -112,7 +114,7 @@ services:
TF_CPP_MIN_LOG_LEVEL: 0 # show TensorFlow log messages for development
## Nvidia Video Transcoding (https://docs.photoprism.app/getting-started/advanced/transcoding/#nvidia-container-toolkit):
NVIDIA_VISIBLE_DEVICES: "all"
NVIDIA_DRIVER_CAPABILITIES: "compute,video,utility"
NVIDIA_DRIVER_CAPABILITIES: "all"
PHOTOPRISM_FFMPEG_ENCODER: "nvidia" # H.264/AVC encoder (software, intel, nvidia, apple, raspberry, or vaapi)
PHOTOPRISM_FFMPEG_SIZE: "1920" # video size limit in pixels (720-7680) (default: 3840)
PHOTOPRISM_FFMPEG_BITRATE: "50" # video bitrate limit in Mbit/s (default: 50)
@@ -144,7 +146,24 @@ services:
extends:
file: ./compose.yaml
service: mariadb
photoprism-vision:
profiles: ["all", "vision"]
environment:
TF_CPP_MIN_LOG_LEVEL: 2
NVIDIA_VISIBLE_DEVICES: "all"
NVIDIA_DRIVER_CAPABILITIES: "all"
deploy:
resources:
reservations:
devices:
- driver: "nvidia"
count: 1
capabilities: [ gpu ]
extends:
file: ./compose.yaml
service: photoprism-vision
qdrant:
profiles: ["all", "vision"]
extends:
file: ./compose.yaml
service: qdrant

View File

@@ -25,6 +25,8 @@ services:
links:
- "traefik:localssl.dev"
- "traefik:app.localssl.dev"
- "traefik:vision.localssl.dev"
- "traefik:qdrant.localssl.dev"
- "traefik:keycloak.localssl.dev"
- "traefik:dummy-oidc.localssl.dev"
- "traefik:dummy-webdav.localssl.dev"
@@ -170,6 +172,11 @@ services:
## Web UI: https://qdrant.localssl.dev/dashboard
qdrant:
image: qdrant/qdrant:latest
profiles: ["all", "vision"]
links:
- "traefik:localssl.dev"
- "traefik:app.localssl.dev"
- "traefik:vision.localssl.dev"
labels:
- "traefik.enable=true"
- "traefik.http.services.qdrant.loadbalancer.server.port=6333"
@@ -188,6 +195,32 @@ services:
- ./.qdrant.yaml:/qdrant/config/production.yaml
- ./storage/qdrant:/qdrant/storage
## PhotoPrism® Computer Vision API
## See: https://github.com/photoprism/photoprism-vision
photoprism-vision:
image: photoprism/vision:latest
profiles: ["all", "vision"]
stop_grace_period: 5s
working_dir: "/app"
links:
- "traefik:localssl.dev"
- "traefik:app.localssl.dev"
- "traefik:qdrant.localssl.dev"
labels:
- "traefik.enable=true"
- "traefik.http.services.qdrant.loadbalancer.server.port=5000"
- "traefik.http.services.qdrant.loadbalancer.server.scheme=http"
- "traefik.http.routers.qdrant.entrypoints=websecure"
- "traefik.http.routers.qdrant.rule=Host(`vision.localssl.dev`)"
- "traefik.http.routers.qdrant.priority=3"
- "traefik.http.routers.qdrant.tls.domains[0].main=localssl.dev"
- "traefik.http.routers.qdrant.tls.domains[0].sans=*.localssl.dev"
- "traefik.http.routers.qdrant.tls=true"
expose:
- 5000
environment:
TF_CPP_MIN_LOG_LEVEL: 2
## Traefik v3 (Reverse Proxy)
## includes "*.localssl.dev" SSL certificate for test environments
## Docs: https://doc.traefik.io/traefik/

View File

@@ -69,6 +69,10 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
client := http.Client{Timeout: ServiceTimeout}
req, reqErr := http.NewRequest(method, uri, bytes.NewReader(data))
// Add "application/json" content type header.
header.SetContentType(req, header.ContentTypeJson)
// Add an authentication header if an access token is configured.
if key != "" {
header.SetAuthorization(req, key)
}
@@ -91,6 +95,8 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
return apiResponse, apiErr
} else if apiErr = json.Unmarshal(apiJson, apiResponse); apiErr != nil {
return apiResponse, apiErr
} else if clientResp.StatusCode >= 300 {
log.Debugf("vision: %s (status code %d)", apiJson, clientResp.StatusCode)
}
return apiResponse, nil

View File

@@ -12,6 +12,7 @@ type Files = []string
type ApiRequest struct {
Id string `form:"id" yaml:"Id,omitempty" json:"id,omitempty"`
Model string `form:"model" yaml:"Model,omitempty" json:"model,omitempty"`
Url string `form:"url" yaml:"Url,omitempty" json:"url,omitempty"`
Images Files `form:"images" yaml:"Images,omitempty" json:"images,omitempty"`
}

View File

@@ -0,0 +1,81 @@
package vision
import (
"errors"
"fmt"
"net/url"
"slices"
"github.com/photoprism/photoprism/internal/api/download"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/media"
"github.com/photoprism/photoprism/pkg/media/http/scheme"
"github.com/photoprism/photoprism/pkg/rnd"
)
// Caption returns generated captions for the specified images.
func Caption(imgName string, src media.Src) (result CaptionResult, err error) {
// Return if there is no configuration or no image classification models are configured.
if Config == nil {
return result, errors.New("vision service is not configured")
} else if model := Config.Model(ModelTypeCaption); model != nil {
// Use remote service API if a server endpoint has been configured.
if uri, method := model.Endpoint(); uri != "" && method != "" {
var imgUrl string
switch src {
case media.SrcLocal:
// Return if no thumbnail filenames were given.
if !fs.FileExistsNotEmpty(imgName) {
return result, errors.New("invalid image file name")
}
dlId, dlErr := download.Register(imgName)
if dlErr != nil {
return result, fmt.Errorf("%s (create download url)", err)
}
imgUrl = fmt.Sprintf("%s/%s", DownloadUrl, dlId)
case media.SrcRemote:
var u *url.URL
if u, err = url.Parse(imgName); err != nil {
return result, fmt.Errorf("%s (invalid image url)", err)
} else if !slices.Contains(scheme.HttpsHttp, u.Scheme) {
return result, fmt.Errorf("unsupported image url scheme %s", clean.Log(u.Scheme))
} else {
imgUrl = u.String()
}
default:
return result, fmt.Errorf("unsupported media source type %s", clean.Log(src))
}
apiRequest := &ApiRequest{
Id: rnd.UUID(),
Model: model.Name,
Url: imgUrl,
}
if json, _ := apiRequest.MarshalJSON(); len(json) > 0 {
log.Debugf("request: %s", json)
}
apiResponse, apiErr := PerformApiRequest(apiRequest, uri, method, model.EndpointKey())
if apiErr != nil {
return result, apiErr
} else if apiResponse.Result.Caption == nil {
return result, errors.New("invalid caption model response")
}
result = *apiResponse.Result.Caption
} else {
return result, errors.New("invalid caption model configuration")
}
} else {
return result, errors.New("missing caption model")
}
return result, nil
}

View File

@@ -0,0 +1,41 @@
package vision
import (
"net"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/photoprism/photoprism/pkg/media"
)
func TestCaption(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
} else if _, err := net.DialTimeout("tcp", "photoprism-vision:5000", 10*time.Second); err != nil {
t.Skip("skipping test because photoprism-vision is not running.")
}
t.Run("Success", func(t *testing.T) {
expectedText := "An image of sound waves"
result, err := Caption("https://dl.photoprism.app/img/artwork/colorwaves-400.jpg", media.SrcRemote)
assert.NoError(t, err)
assert.IsType(t, CaptionResult{}, result)
assert.LessOrEqual(t, float32(0.0), result.Confidence)
t.Logf("caption: %#v", result)
assert.Equal(t, expectedText, result.Text)
})
t.Run("Invalid", func(t *testing.T) {
result, err := Caption("", media.SrcLocal)
assert.Error(t, err)
assert.IsType(t, CaptionResult{}, result)
assert.Equal(t, "", result.Text)
assert.Equal(t, float32(0.0), result.Confidence)
})
}

View File

@@ -0,0 +1,38 @@
package vision
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/photoprism/photoprism/internal/ai/face"
"github.com/photoprism/photoprism/pkg/fs"
)
func TestFaceEmbeddings(t *testing.T) {
t.Run("Success", func(t *testing.T) {
img, imgErr := os.ReadFile(fs.Abs("./testdata/face_160x160.jpg"))
if imgErr != nil {
t.Fatal(imgErr)
}
result, err := FaceEmbeddings(img)
assert.NoError(t, err)
assert.IsType(t, face.Embeddings{}, result)
assert.Equal(t, 1, len(result))
// t.Log(result)
})
t.Run("InvalidFile", func(t *testing.T) {
result, err := FaceEmbeddings([]byte{})
assert.Error(t, err)
assert.IsType(t, face.Embeddings{}, result)
assert.Equal(t, 0, len(result))
// t.Log(result)
})
}

View File

@@ -29,8 +29,8 @@ var (
}
CaptionModel = &Model{
Type: ModelTypeCaption,
Name: "Caption",
Uri: "http://photoprism-vision/api/v1/vision/describe",
Name: "kosmos-2",
Uri: "http://photoprism-vision:5000/api/v1/vision/caption",
Method: http.MethodPost,
Resolution: 720,
}

View File

@@ -67,7 +67,7 @@ func Authorization(c *gin.Context) (authType, authToken string) {
return "", ""
}
// SetAuthorization adds a bearer token authorization header to a request.
// SetAuthorization adds a bearer token authorization header to the given request.
func SetAuthorization(r *http.Request, authToken string) {
if authToken != "" {
r.Header.Add(Auth, fmt.Sprintf("%s %s", AuthBearer, authToken))

View File

@@ -117,3 +117,10 @@ func HasContentType(header *http.Header, contentType string) bool {
return false
}
// SetContentType adds a content type header to the given request.
func SetContentType(r *http.Request, contentType string) {
if contentType != "" {
r.Header.Add(ContentType, contentType)
}
}

View File

@@ -14,4 +14,5 @@ const (
var (
HttpsData = []string{Https, Data}
HttpsHttp = []string{Https, Http}
)