Updated framer with source license

This commit is contained in:
Ray Cole
2025-05-30 08:38:29 -07:00
parent db54eab635
commit a23a124eee
39 changed files with 5643 additions and 23 deletions

2
.gitignore vendored
View File

@@ -26,3 +26,5 @@ go.work.sum
.idea
.vscode
gen/

66
Dockerfile Normal file
View File

@@ -0,0 +1,66 @@
FROM ubuntu:22.04
ENV GOLANG_VERSION=1.23.9
ENV GOLANG_CHECKSUM=de03e45d7a076c06baaa9618d42b3b6a0561125b87f6041c6397680a71e5bb26
ENV GOLANG_ARCH=linux-amd64
# ENV GOLANG_CHECKSUM=3dc4dd64bdb0275e3ec65a55ecfc2597009c7c46a1b256eefab2f2172a53a602
# ENV GOLANG_ARCH=linux-arm64
ENV PROTOC_VERSION=29.3
ENV PROTOC_CHECKSUM=3e866620c5be27664f3d2fa2d656b5f3e09b5152b42f1bedbf427b333e90021a
ENV PROTOC_ARCH=linux-x86_64
# ENV PROTOC_CHECKSUM=6427349140e01f06e049e707a58709a4f221ae73ab9a0425bc4a00c8d0e1ab32
# ENV PROTOC_ARCH=linux-aarch_64
RUN apt-get update && apt-get install -y \
wget \
build-essential \
pkg-config \
unzip \
ffmpeg \
libavcodec-dev \
libavdevice-dev \
libavfilter-dev \
libavformat-dev \
libavutil-dev \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Install golang
RUN wget -q -O /tmp/go${GOLANG_VERSION}.${GOLANG_ARCH}.tar.gz https://go.dev/dl/go${GOLANG_VERSION}.${GOLANG_ARCH}.tar.gz \
&& echo "${GOLANG_CHECKSUM} /tmp/go${GOLANG_VERSION}.${GOLANG_ARCH}.tar.gz" | sha256sum -c - \
&& tar -C /usr/local -xzf /tmp/go${GOLANG_VERSION}.${GOLANG_ARCH}.tar.gz \
&& rm /tmp/go${GOLANG_VERSION}.${GOLANG_ARCH}.tar.gz
RUN wget -q -O /tmp/protoc-${PROTOC_VERSION}-${PROTOC_ARCH}.zip \
https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-${PROTOC_ARCH}.zip \
&& echo "${PROTOC_CHECKSUM} /tmp/protoc-${PROTOC_VERSION}-${PROTOC_ARCH}.zip" | sha256sum -c - \
&& unzip -d /usr/local /tmp/protoc-${PROTOC_VERSION}-${PROTOC_ARCH}.zip \
&& rm /tmp/protoc-${PROTOC_VERSION}-${PROTOC_ARCH}.zip
ENV PATH=${PATH}:/usr/local/go/bin
ENV GOOS=linux
ENV GOROOT=/usr/local/go
ENV GOBIN=/usr/local/go/bin
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest \
&& go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
WORKDIR /app
# Cache dependencies in a separate layer
ADD go.mod go.mod
ADD go.sum go.sum
RUN go mod download
# Build the application
ADD . ./
RUN make && mkdir /app/bin && cp /app/out/* /app/bin/
ENV PATH=${PATH}:/app/bin
WORKDIR /app
ENTRYPOINT ["/bin/bash", "-c"]
CMD ["/app/bin/framer-server"]

View File

@@ -1,23 +1,21 @@
.PHONY: all clean
# Binaries
BINARIES = framer-server
# Go build command
GO_BUILD = go build -o
PROTOC_OPTS = --go_out=gen --go_opt=paths=source_relative --go-grpc_out=gen --go-grpc_opt=paths=source_relative
all: $(BINARIES)
protos: proto/fps/model/*.proto
mkdir -p gen/ && protoc --proto_path=proto $(PROTOC_OPTS) proto/fps/model/*.proto proto/fps/service/*.proto proto/fps/*.proto
# go mod tidy
$(BINARIES): protos
$(GO_BUILD) ./out/$@ ./cmd/$@/main.go
clean:
rm -rf ./out/ ./gen/ $(BINARIES)
.PHONY: all clean
# Binaries
BINARIES = framer-server
# Go build command
GO_BUILD = go build -o
PROTOC_OPTS = --go_out=api/proto/gen/go --go_opt=paths=source_relative --go-grpc_out=api/proto/gen/go --go-grpc_opt=paths=source_relative
all: $(BINARIES)
protos: api/proto/fps/model/*.proto api/proto/fps/service/*.proto api/proto/fps/*.proto
mkdir -p api/proto/gen/go && protoc --proto_path=api/proto $(PROTOC_OPTS) api/proto/fps/model/*.proto api/proto/fps/service/*.proto api/proto/fps/*.proto
$(BINARIES): protos
$(GO_BUILD) ./out/$@ ./cmd/$@/main.go
clean:
rm -rf ./out/ ./gen/ $(BINARIES)

View File

@@ -0,0 +1,35 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// annotation.proto describes an annotation on a frame of data.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "fps/model/geometry.proto";
// Annotation represents a single annotation against a single frame of data.
message Annotation {
oneof expr {
BoundingBox2D bounding_box_2d = 1;
Wireframe2D wireframe_2d = 2;
// Segment2D can be used to represent a polygon.
Segment2D segment_2d = 3;
string text = 4;
}
}

View File

@@ -0,0 +1,56 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// datalake.proto describes a data lake service.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "fps/model/media.proto";
// The model-facing DataLake service doesn't use gRPC.
// Instead, we send protobuf-encoded requests and responses
// in lock-step over a unix domain socket.
// The socket uses SEQ_PACKET so we don't have to delimit messages.
// However, this means we need to agree a priori to a max message size.
// To that end, we'll say that neither side should send a message larger than 64 KB.
// If the server's response would exceed 64 KB, it may just close the connection.
// DataLakeMediaRequest is sent by the model to request media objects.
// These objects may be the original media files or specific slices of them.
message DataLakeUDSMediaRequest {
MediaKey key = 1;
// signature is an opaque string that authenticates the url, if needed.
// Signature strings are NOT consistent across time or nodes, so they should
// not be stored in databases.
string signature = 2;
// meta_only indicates that the model only wants the metadata for the media
// object (i.e., the resulting Media.format is empty.).
bool meta_only = 3;
}
// DataLakeMediaResponse is sent by the data lake in response to a request.
// It contains the requested media object, which will be of format "MediaFD."
// The UDS message will include the file descriptor transfer.
message DataLakeUDSMediaResponse {
Media media = 1;
string error = 2;
}

View File

@@ -0,0 +1,67 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// detection.proto describes messages that detectors emit.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "fps/model/annotation.proto";
import "fps/model/media.proto";
import "fps/model/model.proto";
// Detection represents a single detection in piece of media. Detections are
// the top-level object returned from detectors.
message Detection {
string name = 1; // Name of the thing detected. Typically the class name.
string class_uuid = 2; // Class UUID of the thing detected. Only provided by automl models.
float score = 3; // [0.0, 1.0]
string tracking_id = 4; // Unique ID that tracks the detected entity across time.
// TagValue represents the value side of a tag map, e.g., for the tag
// key "age", the value might be "60" with a score of 0.7.
message TagValue {
string value = 1;
float score = 2; // [0.0, 1.0]
}
map<string, TagValue> tags = 5; // Tags associated with this detection, e.g., age=60 @90%.
// If no annotation is provided, the detection refers to the entire frame.
Annotation annotation = 10;
MediaKey media_key = 11; // Media in which this detection was made.
}
// DetectionSet represents a set of detections made by a single detector.
message DetectionSet {
ModelKey model_key = 1;
repeated Detection detections = 2;
}
// Datum represents a set of media as it transits the pipeline,
// accreting detections as it goes.
message Datum {
// Consumers of Datum might not need to process every media in this set.
// We pass a full Media object instead of just the MediaKey, so consumers
// can examine the MediaInfo and know if it's a supported modality.
// However, medias here may or may not have a format/payload. If the payload
// is desired, the consumer can request it from the datalake.
repeated Media medias = 1;
// detection_sets are the detections made by previous stages in the pipeline.
repeated DetectionSet detection_sets = 2;
}

View File

@@ -0,0 +1,41 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// detector.proto describes a service that detects items of interest in frames
// of data. It is used between the OBOS binary and the detection containers.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "fps/model/detection.proto";
import "fps/model/session.proto";
// Detect() runs against the frames in the request and returns any detections.
message DetectRequest {
int32 session_id = 1;
Datum datum = 3;
}
message DetectResponse {
repeated Detection detections = 2;
}
service Detector {
rpc NewSession(NewSessionRequest) returns (NewSessionResponse);
rpc Detect(DetectRequest) returns (DetectResponse) {}
rpc CloseSession(CloseSessionRequest) returns (CloseSessionResponse) {}
}

View File

@@ -0,0 +1,149 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// geometry.proto describes the basic geometry messages for detections.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "google/protobuf/duration.proto";
// When adding new messages, consider first if we should reuse an existing ROS
// message from here:
// https://github.com/foxglove/message-schemas/tree/main/proto/ros/geometry_msgs
//
// Reuse will make it easier if we start interacting directly with ROS.
// The ROS messages are mostly floating point, 3D, cartesian coordinate frames,
// so they aren't useful for pixel-oriented data.
// SpanTime is a span of time relative to the beginning of a time series data.
message SpanTime {
google.protobuf.Duration offset = 1; // relative to the start of the data
google.protobuf.Duration duration = 2; // relative to the "begin" time
}
// Span1D is a 1D span in a time series data frame, measured in samples of the
// source data.
message Span1D {
reserved 1;
int32 begin = 2;
int32 end = 3; // begin <= end
}
// In all 2D messages, the x-axis is horizontal and the y-axis is vertical.
// The origin is at the top-left corner of the image.
// All points are normalized to [0, 1] x [0, 1] in the image.
// Point2D is a 2D point in an image.
message Point2D {
reserved 1;
double x = 2;
double y = 3;
}
// Size2D is a 2D size of an image.
message Size2D {
int32 width = 2;
int32 height = 3;
}
// Point3D is a 3D point in a scene.
message Point3D {
reserved 1;
double x = 2;
double y = 3;
double z = 4;
double score = 5; // Optional.
}
// Segment2D is a set of points in an image, usually only two. If more than two,
// they are ordered, but not necessarily colinear.
message Segment2D {
reserved 1;
repeated Point2D points = 2;
}
// BoundingBox2D is a rectangle in an image.
// Width and height are normalized to [0, 1] based on the max dimension of the image.
// For a point detection, set width and height to 0.
message BoundingBox2D {
reserved 1;
Point2D origin = 2; // Top-left corner, min x, min y.
double width = 3;
double height = 4;
}
// Wireframe is a collection of named segments, e.g., "forearm", "leg", etc.,
// usually representing a person.
//
// Note: Because a map is being used, segment names cannot be duplicated.
message Wireframe2D {
reserved 1;
map<string, Segment2D> segments = 2;
}
// PointCollection3D is a collection of named points in scene coordinates
// usually representing a person.
//
// Note: Because a map is being used, Point3D names cannot be duplicated.
message PointCollection3D {
reserved 1;
map<string, Point3D> points = 2;
}
// Fov2D is a 2D field of view in an image.
message Fov2D {
double horiz_deg = 1;
double vert_deg = 2;
}
// PointGeo is a 3D point given in geographical coordinates.
message PointGeo {
reserved 1;
double lat_deg = 2;
double long_deg = 3;
double alt_m = 4;
// TODO: covariance matrix?
}
// OrientationGeo describes the orientation of an object in the
// geographical coordinate frame.
message OrientationGeo {
reserved 1;
double yaw_deg = 2;
double pitch_deg = 3;
double roll_deg = 4;
}
// PoseGeo describes the position and orientation of an object in the
// geographical coordinate frame.
message PoseGeo {
reserved 1;
PointGeo position = 2;
// TODO: orientation as a quaternion or yaw/pitch/roll? Relative to 0,0,0 RHR?
}

View File

@@ -0,0 +1,72 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// label.proto describes a label applied to a frame of data.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "fps/model/annotation.proto";
import "fps/model/detection.proto";
import "fps/model/media.proto";
import "fps/model/model.proto";
// Label represents a single label for a frame of data. Labels are the top-level
// object we pass to trainers, so they encompass a variety of kinds of labels.
// A label may be manually created by a human, or it may be an approved suggestion
// by a suggester model, or it may be feedback on a detection, including
// possibly a manual correction.
message Label {
string uuid = 1; // A unique identifier for this label.
string class_uuid = 2; // Dataset class to which this label refers.
// True if this is a negative label. This is included in label, instead of
// annotation, to support marking negative regions in large images, in which
// case the `Label` must still support an annotation with a box or polygon.
bool negative = 3;
// Optional field that indicates that this label is part of a group of labels
// that has been derived from a single label. This is used to track the
// provenance of labels.
string parent_uuid = 4;
// The annotation field is ALWAYS considered to be valid. If this is a manually
// approved suggestion or detection, the detection's annotation is copied here.
// If this is a manually modified suggestion or detection, annotation is the
// modification. If annotation is empty, it indicates that the label
// refers to the entire frame. This is often the case for negative labels.
Annotation annotation = 10;
MediaKey media_key = 11; // The media to which this label refers.
// If this is feedback, model_key is the model that produced the detection or suggestion.
ModelKey model_key = 12;
Detection detection = 13; // The original detection if this is feedback.
// Feedback is set if this label is human feedback on a detection.
enum Feedback {
FEEDBACK_NOT_APPLICABLE = 0; // This isn't feedback. It's a human-generated label.
FEEDBACK_UNKNOWN = 1; // Feedback has not (yet) been provided on this detection.
FEEDBACK_CORRECT = 2; // The detection is correct.
FEEDBACK_INCORRECT = 3; // The detection is incorrect.
FEEDBACK_MODIFIED = 4; // The annotation is a corrected version of the detection.
FEEDBACK_UNCERTAIN = 5; // Unsure whether the detection is correct.
}
Feedback feedback = 20;
}

View File

@@ -0,0 +1,326 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// media.proto describes the media objects we pass around.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "fps/model/geometry.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
// Rational is a rational number, represented as a fraction, emulating ffmpeg.
message Rational {
int64 num = 1;
int64 den = 2;
}
// MediaSliceSpec is a reference to a particular slice of data within a
// container object.
//
// All fields are optional. These fields are typically used to extract a
// single frame (e.g., jpg) from a container (e.g., mp4).
message MediaSliceSpec {
// stream_index is an index to a stream in the object, if applicable.
int32 stream_index = 1;
// offset is a time offset into the object/stream.
google.protobuf.Duration offset = 2;
}
// MediaTransformSpec is a reference to a specific crop region within a media object.
message MediaTransformSpec {
// crop is a 2D rectangle in the object.
BoundingBox2D crop = 1;
// resize is a 2D size to which the object should be resized.
// If either dimension is 0, it is scaled to maintain the aspect ratio.
// If both crop and resize are specified, resizing occurs after cropping.
Size2D resize = 2;
}
// MediaConvertSpec requests a conversion of a media object to a different format.
message MediaConvertSpec {
enum Format {
NONE = 0; // 0 is the default value, so we make it a no-op.
MISB_JSON = 1;
TIFF = 2;
JPEG = 3;
PNG = 4;
GIF = 5;
}
Format format = 1;
}
// MediaFingerprint uniquely identifies the source media object at the URL.
// It contains enough information to know how the object was sampled and
// hashed so that it can be reproduced and compared later, even if
// we change the default way we sample or hash objects.
//
// This is used to detect changes to the object, e.g., if a file is
// replaced or added to. It can also match up objects that are the same
// but have different URLs, e.g., when a file is copied or moved.
//
// For efficiency, we might only sample pieces of the object to generate
// the fingerprint. The number of samples and their actual size may be chosen
// based on the object size and a desired sample size.
//
// Samples must not overlap, so small objects might have only a single sample.
// Samples always include the very beginning of the object. If there are at least
// two samples, the last sample always includes the very end of the object.
// Any additional samples are evenly distributed across the object.
message MediaFingerprint {
// size is the size of the object in bytes.
int64 object_size = 1;
// Method is what we did to generate the hash.
enum Method {
NONE = 0;
// This is currently the default and only method we know, but if we decide
// that we need something else later, we can add it here.
SUBSAMPLED_3x1024_XXHASH64 = 1;
}
Method method = 2;
// hash is a single hash taken over the samples of the object.
bytes hash = 3;
}
// MediaKey refers to a Media object resolvable via the Datalake.
// MediaKeys are intended to be consistent across time and nodes, so they can
// be stored as long-lived references in databases.
message MediaKey {
// url is a URL the datalake can resolve to access the object.
// This might be a local datalake path, something like:
// dl://localhost/path/to/object.mp4
// Or it might from a remote datalake service, like:
// dl://<node-name>/path/to/object.mp4
// Or it might be a sensor URL, like:
// rtsp://camera.local:554/live
// Or it might be an HTTP URL, like:
// https://mybucket.s3.amazonaws.com/object.mp4
//
// Datalake URLs follow a set of rules defined by the datalake service,
// which is the only way to resolve them. They are opaque to most clients.
// If the key refers to a slice, this is the URL of the container from which
// the slice is retrieved.
string url = 1;
// fingerprint is provided for URLs that refer to a file that could move or change.
// The datalake uses this to discriminate between different versions of the same file.
MediaFingerprint fingerprint = 5;
// slice_spec is is optional. If present, it indicates that the object
// at the url should be sliced and only the slice returned.
// Unlike the URL, the slice_spec is LEGIBLE to datalake clients in some contexts.
// E.g., when a media scrubber isolates a single frame from a stream,
// it's OK to craft a slice_spec for it. Clients can also infer information
// from the slice spec, e.g., the relative positions of frames over time.
// But in general, clients will only pass slice_specs that originated in a
// datalake streamer.
MediaSliceSpec slice_spec = 2;
// transform_spec is optional. If present, it indicates that the object
// at the url (& slice) should be cropped and/or resized.
// The transform_spec is LEGIBLE to datalake clients, who may use it to
// request a specific region or resizing of an image.
MediaTransformSpec transform_spec = 3;
// convert_spec is optional. If present, it indicates that the object
// at the url (& slice & transform) should be converted to a different format.
// The convert_spec is LEGIBLE to datalake clients, who may use it to
// request a specific data format. Obviously, not all formats are appropriate
// for all objects.
MediaConvertSpec convert_spec = 4;
}
// VideoStreamInfo information about the video stream.
message VideoStreamInfo {
int64 frame_count = 1; // May be 0 if unsupported by codec or live stream.
int32 width = 2;
int32 height = 3;
double fps = 4;
}
// AudioStreamInfo information about the audio stream.
message AudioStreamInfo {
int64 sample_count = 1;
double samples_per_second = 2;
int32 channels = 3;
}
// SubtitleStreamInfo information about the subtitle stream.
message SubtitleStreamInfo {}
// DataStreamInfo information about the data stream.
message DataStreamInfo {}
// AttachmentStreamInfo information about the attachment stream.
message AttachmentStreamInfo {}
// UnknownStreamInfo information about an unknown stream.
message UnknownStreamInfo {}
// StreamInfo information about the video and audio streams.
message StreamInfo {
// index is the index of the stream in the container.
int32 index = 1;
// type is the IANA Media Type of the data in this stream, absent conversions.
// This is typically a stream type, e.g., streams from "video/mp4" might be
// "video/h264".
string type = 2;
// codec may be specified for stream data inside containers.
// E.g., "klv" when 'type' is "application/octet-stream".
string codec = 3;
// avg_frame_rate is the average frame rate of the stream, per
// ffmpeg AVStream.avg_frame_rate.
Rational avg_frame_rate = 4;
// real_base_frame_rate is the lowest framerate with which all timestamps can be
// represented accurately, per ffmpeg AVStream.r_frame_rate
Rational real_base_frame_rate = 5;
// duration is the duration of the stream, if known.
google.protobuf.Duration duration = 6; // May be 0 if unsupported by codec or live stream.
// metadata is a set of key-value pairs encoded in the container,
// e.g., the language of an audio stream.
map<string, string> metadata = 7;
// start_offset is the earliest PTS of the stream. Typically 0, but not always.
// If we're extracting a random frame from this stream by using a slice offset,
// the range of valid offsets is:
// [start_offset, start_offset + duration)
// Because the duration includes the duration of the last frame, the last frame
// is actually at start_offset + duration - (1/fps).
google.protobuf.Duration start_offset = 8;
// time_base is the time base of the stream expressed as a fraction of seconds.
// This is just useful for understanding the resolution of the timestamps.
Rational time_base = 9;
oneof stream {
VideoStreamInfo video = 11;
AudioStreamInfo audio = 12;
UnknownStreamInfo unknown = 13;
SubtitleStreamInfo subtitle = 18;
DataStreamInfo data = 19;
AttachmentStreamInfo attachment = 20;
}
}
// MediaFileInfo is information about the file containing the media.
message MediaFileInfo {
// name is the base file name, which may be useful for display purposes.
// The full path is not exposed.
string name = 1;
int64 size = 2;
google.protobuf.Timestamp modified = 3;
}
// MediaInfo is information about a piece of media.
message MediaInfo {
// type is the IANA Media Type (previously "MIME Type"), e.g., "video/mp4".
string type = 1;
// codec is optional and may be specified for stream data inside containers.
// E.g., "klv" when 'type' is "application/octet-stream".
string codec = 2;
// duration will be 0 if unsupported by codec or live stream.
google.protobuf.Duration duration = 3;
// is_container is true if this media object is a container of other media
// objects, e.g., an mp4 file.
bool is_container = 4;
// file_info is optional and generally omitted if the media is not a file.
// Only valid for source/container objects backed by an actual file.
MediaFileInfo file_info = 5;
enum PictureType {
PICTURE_TYPE_NONE = 0; ///< Undefined
PICTURE_TYPE_I = 1; ///< Intra
PICTURE_TYPE_P = 2; ///< Predicted
PICTURE_TYPE_B = 3; ///< Bi-dir predicted
PICTURE_TYPE_S = 4; ///< S(GMC)-VOP MPEG-4
PICTURE_TYPE_SI = 5; ///< Switching Intra
PICTURE_TYPE_SP = 6; ///< Switching Predicted
PICTURE_TYPE_BI = 7; ///< BI type
};
PictureType picture_type = 6;
// is_keyFrame is true if this media object is a key frame.
bool is_keyFrame = 7;
// is_seekable is true if this media object can be randomly seeked or streamed.
// This is false for things like unrecorded live streams.
bool is_seekable = 8;
// streams is a set of streams contained in this container-type object.
// May contain valid information even when is_container is false.
map<int32, StreamInfo> streams = 10;
}
// MediaBytes contains the raw bytes of a media object.
// Only used for tiny payloads like caption text or klv that won't overflow
// the gRPC message size limit.
message MediaBytes {
bytes data = 1;
}
// MediaPath points to a file relative to an agreed-upon root.
message MediaPath {
string path = 1;
// TODO(casey): Think about adding an expiration timestamp if this path
// is a temporary cache file.
}
// MediaFD indicates that this media object was provided alongside a file
// descriptor, out-of-band.
message MediaFD {}
// Media is a piece of media on which we're operating. It may be a container
// (e.g., mp4) or a single frame (e.g., jpg). Single frames might be standalone
// files or slices from a container/parent object. The presence of a slice_spec
// in the key indicates whether this is a slice.
message Media {
// key is a reference to the media object.
MediaKey key = 1;
// info is metadata about this media object.
MediaInfo info = 2;
// container_info is optional metadata about the container if this is a slice.
MediaInfo container_info = 3;
// format is the format of the data. Optional.
// TODO(casey): I think we don't actually need this anymore...
oneof format {
MediaBytes media_bytes = 10;
MediaPath media_path = 11;
MediaFD media_fd = 12;
}
reserved 13;
}

View File

@@ -0,0 +1,72 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// migrator.proto describes a service that a model can implement to allow
// upgrades and portability of model internals, like weights and tuning.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
// IngestFileCopy and IngestFileResult are streamed.
// The server initially sends an IngestFileCopy specifying a file path to
// retrieve from the other model container, and a destination path to write it
// inside the server model container. The client conducts the copy operation and
// then sends an IngestFileResult to indicate completion or an error.
//
// NOTE: This approach is modeled on what the Docker Client API provides us:
// https://pkg.go.dev/github.com/docker/docker/client#Client.CopyFromContainer
// https://pkg.go.dev/github.com/docker/docker/client#Client.CopyToContainer
//
// The Docker API copies directories recursively.
message IngestFileResult {
string source_path = 1; // Path inside the other container.
string dest_path = 2; // Path inside this container.
string error = 3; // The model may choose to ignore or fail.
}
message IngestFileCopy {
string source_path = 1; // Path inside the other container.
string dest_path = 2; // Path inside this container.
}
// ExportResponse contains a list of files that can be copied to a clean
// copy of the base image of the same version to produce a copy of the model.
// These files will be copied out of the running container, so no other
// calls should be made to the model until the files are copied.
// NOTE: It's up to the caller (obos) to preserve external metadata
// such as container image labels, generally by injecting a metadata file.
message ExportRequest {}
message ExportResponse {
repeated string paths = 1;
}
// Models with disk state that changes over time may implement the Migrator
// service to allow for preservation of state across nodes and base image upgrades.
service Migrator {
// Ingest directs the model to ingest files from another image based on the
// same architecture. Typically, this will be an earlier, incompatible version
// from which we want to extract weights, config, and other relevant data.
// After this call returns, the model will be committed as a new image.
// Note: For this call the server sends the requests and the client sends the
// responses.
rpc Ingest(stream IngestFileResult) returns (stream IngestFileCopy) {}
// Export allows a model to be transferred to other hosts
// without having to copy the entire container. It should be effectively like
// copying the later layers of the docker image vs. the base image.
// Export only needs to support copies to the same base image version.
rpc Export(ExportRequest) returns (ExportResponse) {}
}

View File

@@ -0,0 +1,73 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// misb0601.proto describes objects that are used in the MISB KLV metadata
// standard. The objects are defined in the MISB ST0601 standard.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "fps/model/geometry.proto";
import "google/protobuf/timestamp.proto";
message MISBPlatform {
OrientationGeo orientation = 1;
}
message MISBSensor {
PointGeo location = 1;
Fov2D fov = 2;
OrientationGeo relative_orientation = 4;
Point2D velocity_mps = 5;
}
message MISBTarget {
PointGeo location = 1;
double width_m = 2;
uint32 track_gate_pxls_width = 3;
uint32 track_gate_pxls_height = 4;
double ground_range_m = 5;
string id = 6;
OrientationGeo relative_orientation = 7;
}
message MISBPoi {
string id = 1;
PointGeo location = 2;
OrientationGeo relative_orientation = 3;
}
message MISBImage {
string sensor_type = 1;
string coordinate_system = 2;
double slant_range_m = 3;
PointGeo center = 4;
PointGeo upper_left = 5;
PointGeo upper_right = 6;
PointGeo lower_left = 7;
PointGeo lower_right = 8;
}
message MISB0601 {
google.protobuf.Timestamp time = 1;
MISBPlatform platform = 2;
MISBSensor sensor = 3;
MISBImage image = 4;
MISBTarget target = 5;
repeated MISBPoi pois = 6;
}

View File

@@ -0,0 +1,47 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// model.proto includes messages that describe a model in the FPS system, for
// both inter-OBOS and UI websocket comms.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
// ModelKey uniquely identifies a model and gives it a human name.
// Note that different implementations of the same model may have different
// docker hashes (e.g., different targets) so this is the unique key.
message ModelKey {
string title = 1;
string version = 2;
string vendor = 3;
string data_set_uuid = 4; // UUID of the data set used to train the model.
}
// ModelInfo describes the characteristics of a model.
message ModelInfo {
ModelKey key = 1;
repeated string tags = 2; // Optional; Usually tags from the docker repo or elsewhere.
map<string, string> labels = 3; // Key:Value labels, defined in our Labels doc.
string model_id = 4; // Docker Shortened Image Hash. Unique ID for this model instance.
}
// ModelStatus describes the real-time status of an active model instance,
// including any statistics.
message ModelStatus {
ModelInfo info = 1;
}

View File

@@ -0,0 +1,27 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// node_key.proto describes node key message used in the FPS for
// both inter-OBOS and UI websocket comms.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
message NodeKey {
bytes ip = 1;
}

View File

@@ -0,0 +1,259 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// parameters.proto provides the messages necessary to both describe the
// parameters a particular image expects, as well as the values representing
// a "trained" image, where the user has chosen values and we are ready to
// begin detecting, etc.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
// https://protobuf.dev/reference/protobuf/google.protobuf
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
import "fps/model/media.proto";
// Parameter definitions - This section is concerned with describing a parameter
// such that the UI can easily render something for the user to enter a value
// Simple data types where the expectation is a single value
enum ParameterType {
TYPE_UNKNOWN = 0;
TYPE_STRING = 1;
TYPE_ENUM = 2;
TYPE_INT = 3;
TYPE_DOUBLE = 4;
TYPE_TIMESTAMP = 5;
TYPE_DURATION = 6;
TYPE_MEDIA_KEY = 7;
TYPE_ANNOTATED_DATASET = 8;
TYPE_FILE_UPLOAD = 9;
TYPE_BOOLEAN = 10;
}
// ParameterName is meant to include both the "name", which represents the
// identifier the trainer understands, and the "description" which is more
// descriptive text to be displayed in the UI.
message ParameterInfo {
string name = 1;
string description = 2;
}
message ParameterListType {
ParameterType element_type = 1;
}
// The ParameterTableType is intended to be used when the model needs parameters
// in a tabular format, where there are repeating values, broken into distinct
// columns. The map in the type represents the columns that appear in the "table",
// and their respective data types.
message ParameterTableType {
map<string, ParameterType> columns = 1;
}
// An ENUM allows the user to specify a canned set of choices for a particular
// parameter. Instead of a free-form field, the user will be confined to a set
// of pre-defined values. In the definition of the ENUM, the `info` field
// will indicate what is displayed to the user in the list, while the `value`
// field will have the actual value that needs to be passed to the model for
// that choice.
message ParameterEnumEntry {
ParameterInfo info = 1;
ParameterValue value = 2;
}
// There is no separate value structure for enums, the expectation is for
// parameters with `allow_multiselect=False`, the single value chosen
// will be put into the `scalar_value` field, while if `allow_multiselect=True`
// then the values will be placed in the `list_value` field.
message ParameterEnumType {
ParameterType enum_type = 1;
repeated ParameterEnumEntry choices = 2;
bool allow_multiselect = 3;
}
// The "definition" of an actual parameter, this is where we give it a name,
// meaningful to the model, and a description, meaningful to the user, and
// the data type.
message ParameterDefinition {
ParameterInfo info = 1;
bool required = 2;
oneof type {
ParameterType scalar_type = 3;
ParameterListType list_type = 4;
ParameterTableType table_type = 5;
ParameterEnumType enum_type = 6;
}
}
// A definition "set" is a named collection of parameters, really just a
// container with some additional information, which could be useful to the
// user.
// A ParameterDefinitionSet is the base type encoded in the Docker
// label, describing the set of parameters a particular image expects.
message ParameterDefinitionSet {
ParameterInfo info = 1;
repeated ParameterDefinition parameters = 2;
}
message FileValue {
string name = 1;
string mime_type = 2;
bytes content = 3;
}
// Parameter values - This section is concerned with the encoding of
// the values of the parameters a particular model expects.
// The encoding of the value of our basic data types described above.
message ParameterValue {
oneof value {
string string_value = 2;
double double_value = 3;
int64 int_value = 4;
google.protobuf.Timestamp timestamp_value = 5;
google.protobuf.Duration duration_value = 6;
MediaKey media_key_value = 7;
FileValue file_value = 8;
bool bool_value = 9;
}
}
// The ParameterList message is a repeating list of values of the same data type:
// One example would be if you wanted to pass in a list of strings, which represented
// the names of objects you were looking for:
//
// ==> ParameterDefinitionSet
// {
// "info": {
// "name": "Model-With-A-List",
// "description": "A simple example of using a list parameter"
// },
// "parameters": [
// {
// "info": {
// "name": "object_names",
// "description": "List of names of the objects to be found"
// },
// "required": true,
// "listType": { "elementType": "TYPE_STRING" }
// }
// ]
// }
//
// ==> SessionParameters
// {
// "values": {
// "object_names": {
// "list": {
// "values": [
// { "stringValue": "boat" },
// { "stringValue": "cruiser" },
// { "stringValue": "destroyer" },
// { "stringValue": "battleship" },
// { "stringValue": "carrier" },
// ]
// }
// }
// }
// }
message ParameterList {
repeated ParameterValue values = 2;
}
// The ParameterTable and ParameterRow messages are used to represent data for
// a ParamterTableType. It is a repeating list of maps of column name to value.
// One example could be if you wanted to capture a more detailed list of object
// classes that includes a description field, for more text context data.
//
// ==> ParameterDefinitionSet
// {
// "info": {
// "name": "Table-Parameter-Example",
// "description": "Example of using a ParameterTableType input message"
// },
// "parameters": [
// {
// "info": {
// "name": "object_classes",
// "description": "List of object classes consisting of [name, description]"
// },
// "required": true,
// "tableType": {
// "columns": { "name": "TYPE_STRING", "description": "TYPE_STRING" }
// }
// }
// ]
// }
//
// And the corresponding session data:
// ==> SessionParameters
// {
// "values": {
// "object_classes": {
// "table": {
// "rows": [
// {
// "values": {
// "name": { "stringValue": "dog" },
// "description": { "stringValue": "a domesticated dog, but not wild dogs, such as
// wolves." }
// }
// },
// {
// "values": {
// "name": { "stringValue": "cat" },
// "description": { "stringValue": "a domesticated cat, but not wild cats, such as
// bobcats." }
// }
// }
// ]
// }
// }
// }
// }
//
// For more examples, see
// https://github.com/TurbineOne/fps-models-research/t1/experimental/params/examples.py
message ParameterRow {
map<string, ParameterValue> values = 1;
}
message ParameterTable {
repeated ParameterRow rows = 1;
}
// The Parameter message is the core, named parameter for a particular image.
// It has a name, description, and describes either a simple, scalar value, a
// list of values, or a table.
message Parameter {
oneof entry {
ParameterValue value = 1;
ParameterList list = 2;
ParameterTable table = 3;
}
}
// SessionParameters is just a wrapper type, for the session parameters label
// on the Docker image. The label will contain a JSON serialized version of
// this object.
message SessionParameters {
map<string, Parameter> values = 1;
}

View File

@@ -0,0 +1,50 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// session.proto defines messages for setting up and tearing down sessions
// for use with unary gRPC calls that require statefulness.
//
// We've opted to use session setups instead of streaming RPCs because
// it's just much easier to build unary servers in Python, which is the most
// common implementation of these APIs.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
// NewSession() returns a session ID when the server is ready to accept requests.
// May block until the model is initialized and ready to accept calls.
// Returns a gRPC error if it can't support a new session.
// Parameters are intended for fps models/workflows that support run time
// arguments such as open vocabulary models or example shot.
// Can be left blank if unused, but if used should match the pydantic class
// defined by the model_parameters field in framework.toml
// Upstream these are populated by AgentDetectRequest
message NewSessionRequest {
string parameters = 1;
}
message NewSessionResponse {
int32 session_id = 1;
}
// CloseSession() releases the session and any associated memory.
// Only called if an instance is expected to serve multiple
// sessions concurrently and/or outlive the first session.
message CloseSessionRequest {
int32 session_id = 1;
}
message CloseSessionResponse {}

View File

@@ -0,0 +1,60 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// suggester.proto describes a service that detects items of interest in
// media given to the suggester.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "fps/model/detection.proto";
import "fps/model/label.proto";
import "fps/model/media.proto";
import "fps/model/session.proto";
message Seed {
string class_id = 1;
oneof seed {
string description = 3;
Label label = 4;
}
}
message SuggestRequest {
int32 session_id = 1;
repeated Seed seeds = 2;
repeated MediaKey media_keys = 4;
}
message Suggestion {
// seed_ref_index is canonical. Ignore class_uuid sent in the Detection
int32 seed_ref_index = 1;
Detection detection = 4;
}
message SuggestResponse {
repeated Suggestion suggestions = 1;
}
service Suggester {
rpc NewSession(NewSessionRequest) returns (NewSessionResponse);
rpc Suggest(SuggestRequest) returns (SuggestResponse) {}
rpc CloseSession(CloseSessionRequest) returns (CloseSessionResponse) {}
}

View File

@@ -0,0 +1,139 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// trainer.proto describes a service that trains a model.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model";
package t1.fps.model;
import "fps/model/label.proto";
import "fps/model/parameters.proto";
message TrainResponse {
string status = 1;
}
message TrainRequest {
string base_dir = 1;
map<string, string> args = 2; // key: value for trainer args e.g. seed
}
// Deprecated; replaced by Trainer2.
service Trainer {
rpc Train(TrainRequest) returns (stream TrainResponse) {}
}
// Resources describes limits the model should impose on itself.
// We may enforce these limits from outside if the capability exists.
message Resources {
int64 gpu_memory = 1; // Bytes.
}
// TrainerConfig contains configuration information. When a field is unset,
// the model should keep the current configuration or use a default value.
// Therefore no fields in this struct should have a valid nil value.
// If zero must be valid, use a sub-message.
//
// This is distinct from any configuration for detectors, suggesters, etc.
//
// The `parameters` field represents all the data needed to become a "trained" image,
// in addition to media and labels passed as a part of the autoML loop.
message TrainerConfig {
map<string, string> class_uuid_to_name = 1;
map<string, Parameter> parameters = 2;
}
// Progress describes the progress of any time-consuming operation.
// These may be arrayed to express nested stage progress.
message Progress {
string stage = 1;
float progress = 2; // [0.0, 1.0]
}
message StartRequest {
Resources limits = 1;
}
message StartResponse {}
message EvictRequest {}
message EvictResponse {}
message CommitRequest {}
message CommitResponse {}
message ConfigRequest {
TrainerConfig config = 1;
}
message ConfigResponse {
TrainerConfig config = 1;
}
message LabelDataAddRequest {
repeated Label labels = 1;
}
message LabelDataAddResponse {}
message LabelDataRemoveRequest {
repeated string label_uuids = 1; // UUIDs refer to previously added labels.
}
message LabelDataRemoveResponse {}
message TrainRequest2 {}
// TrainResponse is streamed from the model.
message TrainResponse2 {
float self_score = 1; // [0.0, 1.0] indicating how well the model thinks it's doing.
repeated Progress progress = 2; // Progress of the training cycle, arbitrarily deep stages.
string info = 3; // Human-readable status for deep-dive UIs.
string warning = 4; // Human-readable warnings that may interrupt at the UI. Use sparingly.
}
// The Trainer service represents a trainable model.
// Calls to the trainer are fully async except for the synchronization-related
// calls (Start, Evict, Commit). The client won't call those while another
// call it outstanding.
service Trainer2 {
// Start allows the trainer to start consuming resources and writing to disk
// in the background or as needed. No other RPCs may be called until Start
// returns. (This is independent of other services offered by the container,
// such as the Detector service.)
//
// NOTE: We may sometimes start a container without wanting it to actually
// do anything. For example, we must start it to copy files out for upgrade.
// In this case, we will not call Start() on this API.
rpc Start(StartRequest) returns (StartResponse) {}
// Evict asks the model to release its resources temporarily.
// It may resume when Start() is called.
rpc Evict(EvictRequest) returns (EvictResponse) {}
// Commit asks the model to save its state to disk. Writes must be
// finished when the call returns, and the disk must not be written again
// until Start() is called.
rpc Commit(CommitRequest) returns (CommitResponse) {}
// Config modifies configuration inside the trainer. The trainer should
// ignore any unset fields in the Config structure, and return its full
// current configuration.
rpc Config(ConfigRequest) returns (ConfigResponse) {}
// LabelDataAdd and LabelDataRemove add and remove label data from the
// training set.
rpc LabelDataAdd(LabelDataAddRequest) returns (LabelDataAddResponse) {}
rpc LabelDataRemove(LabelDataRemoveRequest) returns (LabelDataRemoveResponse) {}
// Train starts a training cycle.
rpc Train(TrainRequest2) returns (stream TrainResponse2) {}
}

View File

@@ -0,0 +1,135 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// framer.proto describes a service that consumes media streams or files
// and produces a stream of frames.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/service";
package t1.fps.service;
import "google/protobuf/duration.proto";
import "fps/source.proto";
import "fps/model/media.proto";
// FrameRequest is a request to start and continue a framing session.
// The embedded messages must be sent in order:
// 1. Start
// 2. Tap
// 3. NextFrames (repeated)
//
// Optional
// - Seek. This can be added to any other request, but, currently
// is a noop if added to a NextFrame request.
message FrameRequest {
// The first request MUST be a Start request. The first response will be a
// Start response. The Start response will contain a list of the streams
// available from the source, mapped by source stream index.
message Start {
// url is the URL of the source to frame. This can be a file path relative
// to the /media mount, or an absolute file, http, or rtsp URL. Aside from
// some path cleaning for file URLs, this is passed directly to ffmpeg.
string url = 1;
// file_framing is only valid if the url points to a file or directory.
SourceFileFraming file_framing = 2;
// is_live tells framer whether to treat the source as a live stream.
bool is_live = 3;
}
Start start = 1;
// The second request MUST be a Tap request. The second response will be a
// Tap response.
message Tap {
// sample describes how to sample frames from the source, if desired.
SourceSample sample = 1;
// stream_indices identify the subset of streams which NextFrames requests
// will return. At least one stream MUST be specified.
repeated int32 stream_indices = 2;
reserved 3;
// output_path is the path of the directory to write frames to. Once frames
// are returned in a NextFrames response, it is the caller's responsibility
// to clean up the underlying files. If the output_path is not specified,
// decoded frames will be returned in-band.
string output_path = 4;
}
Tap tap = 2;
// The third and subsequent requests MUST be NextFrames requests. The
// corresponding responses will be NextFrames responses.
message NextFrames {
// poll requests an immediate response, even if no frames are pending.
bool poll = 1;
}
NextFrames next_frames = 3;
// The optional Seek message can be added to any other request, but, currently
// is a noop if added to a NextFrame request.
// This call only makes sense if the source is a file.
message Seek {
// time is the offset into the file to seek to.
google.protobuf.Duration time = 1;
// closest_keyframe indicates whether to seek to the closest keyframe
// before the specified time. If false, the seek will be to the exact
// time (plus or minus half a frame). Seeking to a keyframe is faster,
// but may not be as accurate since the keyframe may be before the specified time.
bool closest_keyframe = 2;
}
Seek seek = 4;
}
// FrameResponse is a response to a FrameRequest. Each field aligns with the
// corresponding request field.
message FrameResponse {
message Start {
// media_info is the media information for the source.
model.MediaInfo media_info = 1;
}
Start start = 1;
message Tap {}
Tap tap = 2;
message NextFrames {
reserved 1;
// medias may contain multiple temporally-related frames, e.g., an image
// and corresponding camera pose or image coordinates. Indices into this
// array DO NOT correspond to source stream indices.
//
// May contain no frames if the poll flag was set in the request,
// or in certain internal error conditions.
repeated model.Media medias = 2;
}
NextFrames next_frames = 3;
}
// Framer represents a service that converts a media stream or file into a
// series of frames.
//
// The Framer operates by sessions, so multiple sessions can share a single
// network stream and receive frames independently at different rates, while
// the stream is also being recorded. The Framer only stops consuming a stream
// when all the corresponding framing and recording sessions have stopped.
service Framer {
rpc Frame(stream FrameRequest) returns (stream FrameResponse);
}

View File

@@ -0,0 +1,95 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// source.proto defines messages that describe a source of data in the FPS system,
// for both inter-OBOS and UI websocket comms. It's a superset of "sensor"
// that includes things like files.
syntax = "proto3";
option go_package = "github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps";
package t1.fps;
import "fps/model/media.proto";
import "google/protobuf/duration.proto";
// SourceSample describes how to sample frames from a source.
// This applies equally to file and network stream sources.
// Sampling is applied per-stream to the primary streams.
message SourceSample {
oneof sample {
// time_interval indicates that returned frames will be spaced
// by AT LEAST the given interval (e.g. 1/10 s for 10 fps) based on the
// presentation timestamp. Frames arriving any faster will be dropped.
google.protobuf.Duration time_interval = 1;
// frame_interval indicates that every nth frame should be returned.
// The nth frame of any primary stream will trigger a frame to be returned.
int32 frame_interval = 2;
}
}
// SourceFileFraming describes how to extract frames from a file when the source
// is a file or files. Does not apply to real-time network streams.
message SourceFileFraming {
// FileThrottle can be combined with SourceSample to control the pace of
// frames and limit CPU consumption.
enum FileThrottle {
// FILE_THROTTLE_NONE indicates that files will be read as fast as possible.
// Frames will almost certainly be dropped.
FILE_THROTTLE_NONE = 0;
// FILE_THROTTLE_PRESENTATION_RATE indicates that frames will be returned
// at the presentation rate. Frames will be dropped if the pipeline does
// not consume them fast enough.
FILE_THROTTLE_PRESENTATION_RATE = 1;
// FILE_THROTTLE_LOCK_STEP indicates that frames will be returned as
// fast as they are consumed by the pipeline.
FILE_THROTTLE_LOCK_STEP = 2;
}
FileThrottle throttle = 1;
reserved 2, 3;
}
// SourceDirectoryIngest describes how to ingest a directory of files.
// Only applies when the source URL is a directory.
// SourceFileFraming is then applied to each file in the directory, individually.
message SourceDirectoryIngest {
enum FileSelection {
FILE_SELECT_ALL = 0; // Default. All existing and new files. Runs until stopped.
FILE_SELECT_ONLY_NEW = 1; // Only new files since the ingest started. Runs until stopped.
FILE_SELECT_ONLY_EXISTING = 2; // Only files that existed when the ingest started. Terminates.
}
FileSelection file_selection = 1; // Applies equally to files and subdirectories, if not ignored.
// ignore_subdirs indicates that subdirectories (whether existing or new)
// should not be consumed.
bool ignore_subdirs = 2;
// ignore_regex ignores files matching this regex, unless empty.
string ignore_regex = 3;
}
// Source represents a source of data and how it is to be ingested.
message Source {
model.MediaKey key = 1;
SourceSample sample = 2;
SourceFileFraming file_framing = 3;
reserved 4;
SourceDirectoryIngest directory_ingest = 5;
}

78
cmd/framer/config.go Normal file
View File

@@ -0,0 +1,78 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package main
import (
"errors"
"fmt"
"os"
"path/filepath"
"github.com/TurbineOne/ffmpeg-framer/pkg/config"
"github.com/TurbineOne/ffmpeg-framer/pkg/framer"
"github.com/TurbineOne/ffmpeg-framer/pkg/logger"
)
const (
configFileName = "config.yaml"
)
//nolint:gochecknoglobals // Needed for makefile injection.
var (
// Version is provided by the makefile.
Version = "v0"
// Revision is a git tag provided by the makefile.
Revision = "0"
// Created is a date provided by the makefile.
Created = "0000-00-00"
)
// mainConfig is the master config for the executable.
type mainConfig struct { //nolint:govet // Don't care about alignment.
Framer framer.Config `yaml:"framer"`
Logger logger.Config `yaml:"logger"`
}
var currentConfig = mainConfig{ //nolint:gochecknoglobals // Static config
Framer: framer.ConfigDefault(),
Logger: logger.ConfigDefault(),
}
// initConfig initializes the config by calling config.Init() and handling
// the results. May exit the program if there is an error.
func initConfig() {
err := config.Init(configFileName, "", &currentConfig)
if err != nil {
// A missing config file is not fatal. Anything else is.
ncError := &config.NoConfigError{}
if !errors.As(err, &ncError) {
fmt.Println(err.Error()) //nolint:forbidigo // OK to print here.
os.Exit(-1)
}
}
log = logger.New(&currentConfig.Logger)
binName := filepath.Base(os.Args[0])
log.Info().Msg(fmt.Sprintf("%s %s rev:%s created:%s", binName, Version, Revision, Created))
log.Info().Interface("config", &currentConfig).Msg("effective config")
// If there was no config file, we log it here.
if err != nil {
log.Info().Msg(err.Error())
}
}

91
cmd/framer/main.go Normal file
View File

@@ -0,0 +1,91 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package main
import (
"context"
"net"
"os"
"path/filepath"
"github.com/rs/zerolog"
"google.golang.org/grpc"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/service"
"github.com/TurbineOne/ffmpeg-framer/pkg/framer"
"github.com/TurbineOne/ffmpeg-framer/pkg/interrupt"
)
var log zerolog.Logger //nolint:gochecknoglobals // Don't care.
func main() {
initConfig() // May early exit if config init fails.
serviceSocket := filepath.Join(currentConfig.Framer.ServiceSocketRoot,
framer.SocketName)
if err := os.RemoveAll(serviceSocket); err != nil {
log.Error().Err(err).Msg("failed to remove existing socket")
}
l, err := net.Listen("unix", serviceSocket)
if err != nil {
log.Error().Err(err).Msg("failed to listen on socket")
return
}
defer func() {
_ = l.Close()
}()
ctx, cancel := context.WithCancel(context.Background())
go func() {
_ = interrupt.Run(ctx)
cancel()
}()
framer := framer.New(&currentConfig.Framer, &log)
if err := framer.Init(); err != nil {
log.Error().Err(err).Msg("failed to initialize framer")
return
}
opts := make([]grpc.ServerOption, 0)
server := grpc.NewServer(opts...)
service.RegisterFramerServer(server, framer)
go func() {
if err := framer.Run(ctx); err != nil {
log.Error().Err(err).Msg("framer error")
}
server.Stop()
}()
log.Info().Str("socket", serviceSocket).Msg("starting server")
err = server.Serve(l)
if err != nil {
log.Error().Err(err).Msg("gRPC server failed")
}
log.Info().Msg("server stopped")
cancel()
}

11
go.mod
View File

@@ -3,8 +3,19 @@ module github.com/TurbineOne/ffmpeg-framer
go 1.23.0
require (
github.com/aofei/mimesniffer v1.2.1
github.com/asticode/go-astiav v0.7.2-0.20230216133717-17b4d5963cac // Pinned to work with ffmpeg 4.4.x from Ubuntu 22.04.
github.com/rs/zerolog v1.34.0
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6
google.golang.org/grpc v1.71.1
google.golang.org/protobuf v1.36.6
)
require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
golang.org/x/net v0.34.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/text v0.21.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect
)

64
go.sum Normal file
View File

@@ -0,0 +1,64 @@
github.com/aofei/mimesniffer v1.2.1 h1:IMsdcpRp6cxmRywsOo3GlN1p5nwYdW/6kNK543/GYBg=
github.com/aofei/mimesniffer v1.2.1/go.mod h1:RdFvw/YnqGk4qKjvwV5N6SXc/Hr/VaX+eP1iabbqBKk=
github.com/asticode/go-astiav v0.7.2-0.20230216133717-17b4d5963cac h1:+dp1ZkQwdqME75EpkrltpxnVdD6bywxHbuh3bsTF+8I=
github.com/asticode/go-astiav v0.7.2-0.20230216133717-17b4d5963cac/go.mod h1:phvUnSSlV91S/PELeLkDisYiRLOssxWOsj4oDrqM/54=
github.com/asticode/go-astikit v0.28.2 h1:c2shjqarbZwcQGQ7GPfchG2sSOL/7NHGbdgHTx43RH8=
github.com/asticode/go-astikit v0.28.2/go.mod h1:h4ly7idim1tNhaVkdVBeXQZEE3L0xblP7fCWbgwipF0=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk=
go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 h1:y5zboxd6LQAqYIhHnB48p0ByQ/GnQx2BE33L8BOHQkI=
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ=
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f h1:OxYkA3wjPsZyBylwymxSHa7ViiW1Sml4ToBrncvFehI=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:+2Yz8+CLJbIfL9z73EW45avw8Lmge3xVElCP9zEKi50=
google.golang.org/grpc v1.71.1 h1:ffsFWr7ygTUscGPI0KKK6TLrGz0476KUvvsbqWK0rPI=
google.golang.org/grpc v1.71.1/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

89
pkg/config/config.go Normal file
View File

@@ -0,0 +1,89 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package config
import (
"fmt"
"io"
"os"
"github.com/caarlos0/env/v6"
"gopkg.in/yaml.v3"
)
// NoConfigError indicates that we couldn't find a config file.
// This is usually OK and should be treated as a warning.
type NoConfigError struct {
Path string
}
func (e *NoConfigError) Error() string {
return "cannot find config file [" + e.Path + "], continuing with defaults"
}
// parseFile parses the config file at 'path' and overwrites defaults in 'out'.
func parseFile(path string, out interface{}) error {
f, err := os.Open(path)
if err != nil {
ncErr := &NoConfigError{path}
return ncErr
}
defer f.Close() //nolint:errcheck // Don't care about error
b, err := io.ReadAll(f)
if err != nil {
return fmt.Errorf("failed to read config file [%s]: %w", path, err)
}
err = yaml.Unmarshal(b, out)
if err != nil {
return fmt.Errorf("failed to parse config file [%s]: %w", path, err)
}
return nil
}
// parseEnv parses the environment and overwrites defaults in 'out'.
func parseEnv(envPrefix string, out interface{}) error {
envErr := env.Parse(out, env.Options{Prefix: envPrefix})
if envErr != nil {
return fmt.Errorf("config failed to parse environment: %w", envErr)
}
return nil
}
// Init initializes 'out' based on a config file and the environment.
// First it parses the environment variables. Then the YAML config file,
// overriding anything from the environment.
//
// The 'envPrefix' is prefixed to the names of any environment variables
// that we look for, so e.g., if 'envPrefix' is "APP_" and there's a struct
// tag saying $HTTP_PORT, the result will come from $APP_HTTP_PORT.
//
// If the returned error is QuietExitError, the caller should exit with the
// specified exit code.
func Init(path string, envPrefix string, out interface{}) error {
// First, we parse the environment variables.
if err := parseEnv(envPrefix, out); err != nil {
return err
}
// Now we open, read, and parse the contents of the config file.
return parseFile(path, out)
}

40
pkg/framer/config.go Normal file
View File

@@ -0,0 +1,40 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package framer
const (
SocketName = "framer.sock"
)
// Config configures the Framer.
type Config struct {
LogLevel string `yaml:"logLevel"`
FfmpegLogLevel string `yaml:"ffmpegLogLevel"`
HwDecoderEnable bool `yaml:"hwDecoderEnable"`
ServiceSocketRoot string `env:"SERVICE_SOCKET_ROOT" yaml:"serviceSocketRoot"`
}
// ConfigDefault is the default value for Config.
func ConfigDefault() Config {
return Config{
LogLevel: "<inherit>",
FfmpegLogLevel: "verbose", // "debug"
HwDecoderEnable: true,
ServiceSocketRoot: "/service-socket",
}
}

310
pkg/framer/dec_stream.go Normal file
View File

@@ -0,0 +1,310 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package framer
import (
"container/ring"
"errors"
"fmt"
"github.com/asticode/go-astiav"
"github.com/rs/zerolog"
)
var codecIDToHwDecoder = map[astiav.CodecID]string{
astiav.CodecIDH264: "h264_cuvid",
astiav.CodecIDHevc: "hevc_cuvid",
astiav.CodecIDMpeg2Video: "mpeg2_cuvid",
astiav.CodecIDMpeg4: "mpeg4_cuvid",
astiav.CodecIDVc1: "vc1_cuvid",
astiav.CodecIDVp8: "vp8_cuvid",
astiav.CodecIDVp9: "vp9_cuvid",
}
type SkippedCodecError struct {
CodecID astiav.CodecID
}
func (e SkippedCodecError) Error() string {
return fmt.Sprintf("skipping decoder, returning raw data for: %s", e.CodecID.Name())
}
// decStream wraps a single input stream with the state for decoding and
// distributing frames.
type decStream struct {
// InFFmpegStream is exported so it can be referenced when setting up
// corresponding output streams.
InFFmpegStream *astiav.Stream
// TimeBase is the time base of the decoder.
TimeBase astiav.Rational
// If decCodecContext is nil, this stream is a no-op for an unsupported
// encoding and just passes packetWrappers through. If not nil, it's a
// decoding stream returning frameWrappers.
decCodecContext *astiav.CodecContext
// inPkt is the packet we read from the input stream to decode.
inPkt *astiav.Packet
// outFW is the frameWrapper we're currently decoding into.
outFW *frameWrapper
// We use a ring buffer of frameWrappers to distribute data.
frameWrappers *ring.Ring
FrameCount int
}
// newDecStream returns a new decStream instance. Must be closed with Close() when done.
func newDecStream() *decStream {
// 3 wrappers seems like enough that we can have taps processing one frame as
// the stream distributes the next, and then invalidates the 3rd for the next input.
// It seems like enough to avoid missed frames in the worst case for lock-step.
// However, aac seems to come in multi-frame packets, and 4 seems to help
// prevent drops.
const numWrappers = 4
return &decStream{
inPkt: astiav.AllocPacket(),
frameWrappers: ring.New(numWrappers),
}
}
func (st *decStream) MarshalZerologObject(e *zerolog.Event) {
if st.decCodecContext == nil {
return
}
e.Str(lCodec, st.decCodecContext.CodecID().Name()).
Int(lTimeBaseNum, st.TimeBase.Num()).
Int(lTimeBaseDen, st.TimeBase.Den()).
Int(lFrameRateNum, st.decCodecContext.Framerate().Num()).
Int(lFrameRateDen, st.decCodecContext.Framerate().Den()).
Int(lIndex, st.InFFmpegStream.Index())
}
// Close frees ffmpeg resources associated with the stream.
func (st *decStream) Close() {
if st.decCodecContext != nil {
// Best practice is to send a nil packet to flush the decoder.
_ = st.decCodecContext.SendPacket(nil)
fw := st.nextFrameWrapper() // need a dummy frame into which we can receive.
fw.SetValid(false)
var err error
for err == nil {
err = st.decCodecContext.ReceiveFrame(fw.Frame)
}
st.decCodecContext.Free()
}
st.inPkt.Free()
st.frameWrappers.Do(func(v interface{}) {
if v != nil {
v.(*frameWrapper).Close() //nolint:forcetypeassert // It's a frameWrapper.
}
})
}
// initFrameDecoder initializes the stream to decode frames from the input
// stream and wrap them in frameWrappers.
func (st *decStream) initFrameDecoder(inputFormatContext *astiav.FormatContext,
input *astiav.Stream, decCodec *astiav.Codec, rawURL string,
) {
var err error
defer func() {
if err != nil {
log.Info().Int(lIndex, input.Index()).Str(lCodec, decCodec.Name()).Err(err).Msg("")
st.decCodecContext.Free()
st.decCodecContext = nil
}
}()
codecParams := input.CodecParameters()
mediaType := codecParams.MediaType()
decCodecID := codecParams.CodecID()
st.decCodecContext = astiav.AllocCodecContext(decCodec)
_ = codecParams.ToCodecContext(st.decCodecContext)
switch mediaType {
case astiav.MediaTypeAudio:
if st.decCodecContext.SampleRate() == 0 {
const guessSampleRate = 48000
log.Info().Int(lIndex, input.Index()).Str(lCodec, decCodec.Name()).
Msg("guessing sample rate for audio stream")
st.decCodecContext.SetSampleRate(guessSampleRate)
}
if st.decCodecContext.ChannelLayout() == 0 {
log.Info().Int(lIndex, input.Index()).Str(lCodec, decCodec.Name()).
Msg("guessing channel layout for audio stream")
st.decCodecContext.SetChannelLayout(astiav.ChannelLayoutStereo)
}
case astiav.MediaTypeVideo:
st.decCodecContext.SetFramerate(inputFormatContext.GuessFrameRate(input, nil))
case astiav.MediaTypeSubtitle:
// This decoder crashes during decCodecContext.SendPacket() with an internal
// assertion failure that seems like it could be an ffmpeg bug. Bypassing for now.
if decCodecID == astiav.CodecIDDvbSubtitle {
err = &SkippedCodecError{CodecID: decCodecID}
return
}
}
if err = st.decCodecContext.Open(decCodec, nil); err != nil {
err = fmt.Errorf("opening decoder context failed: %w", err)
return
}
st.TimeBase = st.decCodecContext.TimeBase()
for i := 0; i < st.frameWrappers.Len(); i++ {
st.frameWrappers.Value = newFrameWrapper(mediaType, input.Index(), decCodecID.Name())
st.frameWrappers = st.frameWrappers.Next()
}
st.frameWrappers.Do(func(v interface{}) {
//nolint:forcetypeassert // We know.
fwErr := v.(*frameWrapper).Init(st.decCodecContext, rawURL)
// We just remember the first error for the outer layer to return.
if fwErr != nil && err == nil {
err = fmt.Errorf("initializing frame wrapper(s) failed: %w", fwErr)
}
})
st.outFW = st.nextFrameWrapper()
}
// Init initializes the stream. It uses the inputFormatContext to guess the
// framerate of the stream, if it's a video stream. If there are initialization
// errors, falls back to being a passthrough stream. (Always returns nil.)
func (st *decStream) Init(inputFormatContext *astiav.FormatContext, input *astiav.Stream, rawURL string, hwAccel bool) {
st.InFFmpegStream = input
st.TimeBase = input.TimeBase() // Default, but if we open the decoder, this will be updated.
var decCodec *astiav.Codec
if decName, ok := codecIDToHwDecoder[input.CodecParameters().CodecID()]; ok && hwAccel {
log.Debug().Int(lIndex, input.Index()).Str(lCodec, input.CodecParameters().CodecID().Name()).
Str(lDecoder, decName).Msg("using hardware decoder")
decCodec = astiav.FindDecoderByName(decName)
}
if decCodec == nil {
decCodec = astiav.FindDecoder(input.CodecParameters().CodecID())
}
if decCodec == nil {
// This descStream will just pass through raw packets from the input stream.
log.Debug().Int(lIndex, input.Index()).Str(lCodec, input.CodecParameters().CodecID().Name()).
Msg("no decoder found, falling back to passthrough")
return
}
st.initFrameDecoder(inputFormatContext, input, decCodec, rawURL)
}
// nextFrameWrapper returns the next wrapper in this stream's ring buffer.
func (st *decStream) nextFrameWrapper() *frameWrapper {
st.frameWrappers = st.frameWrappers.Next()
return st.frameWrappers.Value.(*frameWrapper) //nolint:forcetypeassert // It's a wrapper.
}
// Decode converts the a single packet into a set of wrappers, decoding if needed.
func (st *decStream) Decode(pw *packetWrapper) ([]wrapper, error) {
// If no decoder, this stream is a no-op.
if st.decCodecContext == nil {
wrappers := []wrapper{pw}
return wrappers, nil
}
if err := pw.Unwrap(st.inPkt); err != nil {
return nil, err
}
st.inPkt.RescaleTs(st.InFFmpegStream.TimeBase(), st.decCodecContext.TimeBase())
if err := st.decCodecContext.SendPacket(st.inPkt); err != nil {
return nil, fmt.Errorf("sending packet to decoder failed: %w", err)
}
return st.receiveFrames()
}
// receiveFrames receives all frames from the decoder and returns them.
func (st *decStream) receiveFrames() ([]wrapper, error) {
wrappers := make([]wrapper, 0, 1)
// Technically, one packet could expand into multiple frames, so we query
// the decoder in a loop until it returns an error.
// If multiple frames actually happens with any regularity, we should increase
// the size of the ring buffer.
for {
// We're about to overwrite this wrappers's ffmpeg buffer.
// If there's a tap still holding this wrapper, setting it invalid
// ensures that fw.ToModelMedia() will not try to access the wrapped frame.
st.outFW.SetValid(false)
// ReceiveFrame() will release any previous buffers in fw.Frame.
if err := st.decCodecContext.ReceiveFrame(st.outFW.Frame); err != nil {
if errors.Is(err, astiav.ErrEof) || errors.Is(err, astiav.ErrEagain) {
st.FrameCount += len(wrappers)
if len(wrappers) > st.frameWrappers.Len() {
log.Info().Int(lFrameCount, len(wrappers)).
Int(lIndex, st.InFFmpegStream.Index()).
Msg("frame decode count exceeded ring buffer size")
}
return wrappers, nil
}
return nil, fmt.Errorf("receiving frame from decoder failed: %w", err)
}
// Now this fw has a valid frame and can be sent to taps.
st.outFW.SetValid(true)
wrappers = append(wrappers, st.outFW)
st.outFW = st.nextFrameWrapper()
}
}
// DecodeFlush flushes the decoder and returns any remaining frames.
func (st *decStream) DecodeFlush() ([]wrapper, error) {
if st.decCodecContext == nil {
return nil, nil
}
// Best practice is to send a nil packet to flush the decoder.
_ = st.decCodecContext.SendPacket(nil)
return st.receiveFrames()
}

129
pkg/framer/ffmpeg_logger.go Normal file
View File

@@ -0,0 +1,129 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package framer
import (
"strings"
"github.com/asticode/go-astiav"
"github.com/rs/zerolog"
)
var ffmpegLog zerolog.Logger
// ffmpegToZerologLevel maps ffmpeg's internal log levels to zerolog's.
// This maps is queried on every log invocation.
var ffmpegToZerologLevel = map[astiav.LogLevel]zerolog.Level{
astiav.LogLevelQuiet: zerolog.Disabled,
astiav.LogLevelPanic: zerolog.PanicLevel,
astiav.LogLevelFatal: zerolog.FatalLevel,
astiav.LogLevelError: zerolog.ErrorLevel,
astiav.LogLevelWarning: zerolog.WarnLevel,
astiav.LogLevelInfo: zerolog.InfoLevel,
astiav.LogLevelVerbose: zerolog.DebugLevel, // FFmpeg's verbose is more like zerolog's debug...
astiav.LogLevelDebug: zerolog.TraceLevel, // because ffmpeg's debug is more like zerolog's trace.
}
// nameToFfmpegLogLevel maps ffmpeg's log level names to their internal values.
// We have to use ffmpeg's native levels because there are more of them than
// there are zerolog levels, so the mapping isn't 1:1.
// We only use this for config translation at startup.
var nameToFfmpegLogLevel = map[string]astiav.LogLevel{
"quiet": astiav.LogLevelQuiet,
"panic": astiav.LogLevelPanic,
"fatal": astiav.LogLevelFatal,
"error": astiav.LogLevelError,
"warning": astiav.LogLevelWarning,
"info": astiav.LogLevelInfo,
"verbose": astiav.LogLevelVerbose,
"debug": astiav.LogLevelDebug,
}
// squelchedFfmpegLogPrefixes is a list of prefixes for ffmpeg log messages
// that we want to squelch. Sometimes ffmpeg logs the same message over and over
// for a stream, and it can do it so frequently that we become I/O blocked on logs.
var squelchedFfmpegLogPrefixes = []string{
"PES packet size",
"Packet corrupt",
"Invalid level prefix",
"error while decoding MB",
"more samples than frame size",
"deprecated pixel format used",
}
// How many times we saw each message. Didn't bother making this atomic
// figuring that it's not worth the overhead and miscounts are inconsequential.
var squelchedFfmpegLogCounts = make([]int, len(squelchedFfmpegLogPrefixes))
const (
squelchedLogInterval = 1024 // log every Nth message
lSquelch = "squelch count"
)
func ffmpegLogCallback(l astiav.LogLevel, fmt, msg, parent string) {
// FFmpeg sometimes logs a single "." to indicated progress. We just ignore it.
if msg == ".\n" {
return
}
var (
squelch bool
i int
prefix string
)
for i, prefix = range squelchedFfmpegLogPrefixes {
if strings.HasPrefix(msg, prefix) {
squelch = true
squelchedFfmpegLogCounts[i]++
if squelchedFfmpegLogCounts[i]%squelchedLogInterval != 1 {
return
}
break
}
}
zl, ok := ffmpegToZerologLevel[l]
if !ok {
zl = zerolog.ErrorLevel // If it's not in the map, at least we'll log it.
}
msg = strings.TrimSuffix(msg, "\n")
event := ffmpegLog.WithLevel(zl)
if squelch {
event = event.Int(lSquelch, squelchedFfmpegLogCounts[i])
}
event.Msg(msg)
}
func ffmpegLoggerSetup(config *Config) {
ffmpegLog = log.With().Str("pkg", "ffmpeg").Logger()
ffmpegLogLevel, ok := nameToFfmpegLogLevel[config.FfmpegLogLevel]
if !ok {
panic("invalid ffmpeg log level: " + config.FfmpegLogLevel)
}
// FFmpeg logs get doubly filtered. First we set the ffmpeg-specific level:
astiav.SetLogLevel(ffmpegLogLevel)
// The FFmpeg logs then feed through the Framer's own log level filter.
astiav.SetLogCallback(ffmpegLogCallback)
}

208
pkg/framer/frame.go Normal file
View File

@@ -0,0 +1,208 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//nolint:wrapcheck // gRPC calls should return status.Error.
package framer
import (
"context"
"errors"
"io"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/service"
)
const grpcErrorFormat = "%s"
// rewriteError changes certain errors to be more appropriate for a gRPC client.
func rewriteError(ctx context.Context, err error) error {
// EOF is expected for files, and returning a nil causes gRPC to return
// a simple, unwrapped io.EOF to the client.
if errors.Is(err, io.EOF) {
return nil
}
// This is normal for a client cancellation.
if ctx.Err() != nil {
return nil //nolint:nilerr // Intentional.
}
// If it's not a gRPC status error, we upgrade it to look like one.
if _, ok := status.FromError(err); !ok {
return status.Errorf(codes.Aborted, grpcErrorFormat, err.Error())
}
return err
}
// serviceNextFrames is the steady-state loop for the Frame() gRPC call.
func (f *Framer) serviceNextFrames(grpcStream service.Framer_FrameServer, frameTap *frameTap,
) (frameCounts []int, err error) {
frameCounts = make([]int, frameTap.maxStreamIndex+1)
defer func() {
err = rewriteError(grpcStream.Context(), err)
}()
for {
var req *service.FrameRequest
if req, err = grpcStream.Recv(); err != nil {
return frameCounts, err
}
if req.GetNextFrames() == nil {
err = status.Error(codes.InvalidArgument, "expected FrameRequest.NextFrames")
return frameCounts, err
}
var medias []*model.Media
if medias, err = frameTap.GetModelMedias(grpcStream.Context(), req.GetNextFrames().GetPoll()); err != nil {
return frameCounts, err
}
for _, media := range medias {
frameCounts[media.GetKey().GetSliceSpec().GetStreamIndex()]++
}
if err = grpcStream.Send(&service.FrameResponse{
NextFrames: &service.FrameResponse_NextFrames{
Medias: medias,
},
}); err != nil {
return frameCounts, err
}
}
}
// Frame implements the gRPC Framer service.
//
//nolint:funlen // Not worth breaking up.
func (f *Framer) Frame(grpcStream service.Framer_FrameServer) error {
var (
rawURL string
frameCounts []int
err error
)
defer func() {
log.Info().Ints(lFrameCount, frameCounts).Err(err).Str(lURL, rawURL).Msg("Frame() exiting")
}()
// First request MUST be a Start.
req, err := grpcStream.Recv()
if err != nil {
return status.Error(codes.Canceled, err.Error())
}
startReq := req.GetStart()
if startReq == nil {
err = status.Error(codes.InvalidArgument, "expected FrameRequest.Start")
return err
}
rawURL = startReq.GetUrl()
log.Info().Str(lURL, rawURL).Msg("Frame() start")
source, err := f.getSource(grpcStream.Context(), rawURL, startReq.GetFileFraming(),
startReq.GetIsLive(), req.GetSeek())
if err != nil {
err = status.Errorf(codes.NotFound, grpcErrorFormat, err.Error())
return err
}
defer source.Unref()
setupErr := source.SetupErr()
if setupErr != nil {
return status.Errorf(codes.InvalidArgument, grpcErrorFormat, setupErr.Error())
}
startResp := &service.FrameResponse{
Start: &service.FrameResponse_Start{
MediaInfo: source.MediaInfo(),
},
}
if err = grpcStream.Send(startResp); err != nil {
err = status.Error(codes.Canceled, err.Error())
return err
}
// Second request MUST be a Tap to describe the streams to be tapped.
req, err = grpcStream.Recv()
if err != nil {
err = status.Error(codes.Canceled, err.Error())
return err
}
tapReq := req.GetTap()
if tapReq == nil {
err = status.Error(codes.InvalidArgument, "expected FrameRequest.Tap")
return err
}
if seekReq := req.GetSeek(); seekReq != nil {
if err = source.Seek(seekReq.GetTime().AsDuration(), seekReq.GetClosestKeyframe()); err != nil {
err = status.Errorf(codes.InvalidArgument, grpcErrorFormat, err.Error())
return err
}
}
frameTap := newFrameTap(source)
if err = frameTap.Init(tapReq.StreamIndices, int(tapReq.GetSample().GetFrameInterval()),
tapReq.GetSample().GetTimeInterval().AsDuration(), tapReq.OutputPath); err != nil {
err = status.Errorf(codes.Internal, grpcErrorFormat, err.Error())
return err
}
if err = source.AddTap(frameTap, tapLayerDecoder); err != nil {
err = status.Errorf(codes.NotFound, grpcErrorFormat, err.Error())
return err
}
defer source.RemoveTap(frameTap)
log.Debug().Str(lURL, rawURL).Msg("Frame() tap ok")
if err = grpcStream.Send(&service.FrameResponse{
Tap: &service.FrameResponse_Tap{},
}); err != nil {
err = status.Error(codes.Canceled, err.Error())
return err
}
// From here on out, all we expect are NextFrames requests.
frameCounts, err = f.serviceNextFrames(grpcStream, frameTap)
return err
}

625
pkg/framer/frame_tap.go Normal file
View File

@@ -0,0 +1,625 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package framer
import (
"context"
"fmt"
"io"
"math/rand"
"os"
"path/filepath"
"reflect"
"sync"
"time"
"github.com/asticode/go-astiav"
"github.com/rs/zerolog"
"golang.org/x/exp/slices"
"google.golang.org/protobuf/proto"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model"
)
type noTappedStreamsError struct{}
func (*noTappedStreamsError) Error() string {
return "no tapped streams"
}
type unsupportedMediaFormatError struct {
format interface{}
}
func (e *unsupportedMediaFormatError) Error() string {
return fmt.Sprintf("unsupported media format: %T", e.format)
}
type duplicateOutputFileError struct {
stat os.FileInfo
}
func (e *duplicateOutputFileError) Error() string {
return fmt.Sprintf("media would overwrite existing output file: %+v", e.stat)
}
// frameTapStream represents a single stream within the frame tap.
type frameTapStream struct {
frameC chan wrapper
// State for sub-sampling frames on primary streams:
timeBase astiav.Rational // can be different per-stream, hence ptsInterval is per-stream
ptsInterval int64 // minimum presentation time stamp (PTS) between sending frames
prevPTS int64 // previous sent frame's PTS
frameSkips int // When this reaches frameInterval, we send a frame on this stream.
// State for detecting reused frames on receive side:
prevRxOffset time.Duration // previously received frame offset
}
func (fts *frameTapStream) MarshalZerologObject(e *zerolog.Event) {
e.Bool(lRequested, fts.frameC != nil).
Int64(lPTSInterval, fts.ptsInterval)
}
// frameTap represents a model.Media output for a source. It is the result
// of a Start() request. Subsequent requests for frames consume from the
// slice of channels embedded in the frameTap, one channel per stream.
// When the client is requesting frames faster than they're coming in,
// the client is blocked on this set of channels. When the source is generating
// frames faster than the client, the tap drops older frames in favor of newer
// ones, preventing stale frames being seen by the client. So each channel
// acts like a 1-deep, head-drop buffer.
type frameTap struct {
source *source
streams map[int]*frameTapStream
outputPath string
outputFilePrefix string // unique to this tap
maxStreamIndex int
framesIn []int32
framesDropped []int32
// Init() sets these:
frameInterval int // how many frames read in per sampled frame sent out
// selectCases is all stream.frameC channels, plus a spare for a ctx, added at runtime.
// Case index may not match stream index because requested streams is sparse,
// and this slice must be dense for reflect.Select().
selectCases []reflect.SelectCase
closeOnce sync.Once
closeErrC chan error
}
// newFrameTap creates a new frame tap for the given source.
// The resulting tap must be initialized before adding to the source.
func newFrameTap(s *source) *frameTap {
mediaInfo := s.MediaInfo()
t := &frameTap{
source: s,
streams: make(map[int]*frameTapStream),
closeErrC: make(chan error, 1),
}
decStreams := s.DecStreams()
for i32 := range mediaInfo.Streams {
i := int(i32)
var tb astiav.Rational
if decStreams[i] != nil {
tb = decStreams[i].TimeBase
}
t.streams[i] = &frameTapStream{
timeBase: tb,
}
if i > t.maxStreamIndex {
t.maxStreamIndex = i
}
}
t.framesIn = make([]int32, t.maxStreamIndex+1)
t.framesDropped = make([]int32, t.maxStreamIndex+1)
return t
}
// randString generates a random string of length n.
// Characters are compatible with file names.
func randString(n int) string {
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
b := make([]byte, n)
for i := range b {
b[i] = letterBytes[rand.Intn(len(letterBytes))]
}
return string(b)
}
// Init initializes the frameTap with the given parameters.
func (t *frameTap) Init(tappedStreams []int32, frameInterval int,
timeInterval time.Duration, outputPath string,
) error {
t.frameInterval = frameInterval
t.outputPath = outputPath
const randomizerLength = 6
// Any truncation will start cutting at the front of the string, so we add
// our unique random string toward the end.
t.outputFilePrefix = t.source.outFilePrefix + "-" + randString(randomizerLength)
logTapStreams := zerolog.Arr()
tappedSomething := false
for i, stream := range t.streams {
stream.ptsInterval = durationToPts(timeInterval, stream.timeBase)
// We init prevPTS and frameSkips such that the first frame is always sent.
stream.prevPTS = -stream.ptsInterval - 1 // -1 prevents a 0 PTS from looking like a duplicate
stream.frameSkips = frameInterval
if slices.Contains(tappedStreams, int32(i)) {
tappedSomething = true
stream.frameC = make(chan wrapper, 1)
t.selectCases = append(t.selectCases,
reflect.SelectCase{Dir: reflect.SelectRecv, Chan: reflect.ValueOf(stream.frameC)})
}
logTapStreams.Object(stream)
}
if !tappedSomething {
return &noTappedStreamsError{}
}
// We add a spare case to hold a ctx channel, which must be added at runtime.
t.selectCases = append(t.selectCases, reflect.SelectCase{})
log.Info().Str(lURL, t.source.url).Array(lTapStreams, logTapStreams).
Int(lFrameInterval, t.frameInterval).Dur(lTimeInterval, timeInterval).
Msg("frame tap init")
return nil
}
// DrainUntilClosed consumes and discards tap frames until the tap is closed.
// This is called in the context of the consumer when it's done with the tap
// and requesting its removal (though normally called by the source's RemoveTap()
// method). If the source is currently blocked on this tap, draining releases it,
// allowing the source to finish a pending Send() and unlock the taps mutex so
// removal can complete.
func (t *frameTap) DrainUntilClosed() {
for _, s := range t.streams {
if s.frameC != nil {
go func(c <-chan wrapper) {
for range c {
// Drain.
}
}(s.frameC)
}
}
}
// Close is called by the source and closes the channels, alerting the consumer
// that the tap is done.
func (t *frameTap) Close(err error) {
t.closeOnce.Do(func() {
t.closeErrC <- err
close(t.closeErrC)
for _, s := range t.streams {
if s.frameC != nil {
close(s.frameC)
}
}
log.Info().Err(err).Str(lURL, t.source.url).
Ints32(lFrameCount, t.framesIn).Ints32(lFrameDropCount, t.framesDropped).
Msg("frame tap closed")
})
}
// subSample returns true if the frame should be sent to the client.
func (t *frameTap) subSample(stream *frameTapStream, w wrapper) bool {
pts := w.PTS()
// We require timestamps to be unique, so we always drop frames that have the
// same PTS as the previous frame emitted on this stream.
if stream.prevPTS == pts {
log.Debug().Int(lIndex, w.StreamIndex()).Int64(lPTS, pts).
Int(lTimeBaseNum, stream.timeBase.Num()).Int(lTimeBaseDen, stream.timeBase.Den()).
Str(lURL, t.source.url).Msg("dropping frame with duplicate PTS")
return false
}
// If we're time sampling, we drop frames that are too close to the previous frame in time.
if stream.ptsInterval != 0 && pts-stream.prevPTS < stream.ptsInterval {
return false
}
// If we're frame sampling, we drop all but every n'th frame.
if t.frameInterval != 0 && stream.frameSkips < t.frameInterval {
stream.frameSkips++
return false
}
stream.frameSkips = 0
stream.prevPTS = pts
return true
}
// Send is called by the source to send data to the tap.
// This may only be called by the source until the tap is removed from the source.
// This is not thread-safe because it assumes we can empty and re-fill the
// channel without blocking. But it's only called by the source, which is
// single-threaded.
func (t *frameTap) Send(ctx context.Context, w wrapper, lossy bool) {
streamIndex := w.StreamIndex()
stream, ok := t.streams[streamIndex]
if !ok {
log.Info().Int(lIndex, streamIndex).Msg("stream index not in tap channels")
return
}
t.framesIn[streamIndex]++
if !t.subSample(stream, w) {
// Intentionally sub-sampling isn't counted as a "drop" in the stats.
return
}
if stream.frameC != nil {
// If this is lossy, we clear any stale data out of the channel,
// if still there. That makes sure the channel send won't block, and the
// channel receive will only see the very latest frame.
if lossy {
select {
case <-stream.frameC:
t.framesDropped[streamIndex]++
default:
}
}
// Now we buffer the data into the channel, potentially blocking if !lossy.
stream.frameC <- w
}
}
// wrappersToModelMedias converts a slice of wrappers to a slice of model.Medias
// by calling ToModelMedia() on each. It skips any nil wrappers in the slice,
// as these streams were either not present or not requested by the client.
// It returns nil if nothing was successfully encoded. This can happen if
// one of the wrappers fails encoding, or if we were late encoding it and the
// internal media was already reclaimed. It's also possible to return a slice
// containing valid medias, but not one from a "primary" stream.
func (t *frameTap) wrappersToModelMedias(wrappers []wrapper) []*model.Media {
mms := make([]*model.Media, 0, len(wrappers))
gotValidMedia := false
for i, w := range wrappers {
if w == nil {
continue
}
mm := w.ToModelMedia()
if mm == nil {
// A nil mm means we were late encoding the frame and it was already
// reclaimed, or there was some other error encoding it.
log.Debug().Int(lIndex, i).Msg("lost frame")
// TODO: Probably should t.framesDropped++ here, but we're async so
// that would need a mutex.
continue
}
// Race condition: It's possible that we receive a frame/packet wrapper from
// the stream.frameC, but the wrapper gets reused before we can encode it here.
// Then the same wrapper gets sent back to frameC, resulting in us seeing
// the same wrapper here twice in a row. We can detect this by checking the
// offset of the slice spec, which should be unique. This only happens on
// streams where the sender is running lossy and we get a burst of ~3
// frames for the same stream back-to-back before the next request.
// If it happens, we just drop the second copy.
offset := mm.GetKey().GetSliceSpec().GetOffset().AsDuration()
si := w.StreamIndex()
if t.streams[si].prevRxOffset != 0 && offset == t.streams[si].prevRxOffset {
log.Debug().Interface("key", mm.GetKey()).Int64(lPTS, w.PTS()).Str("prefix", t.outputFilePrefix).
Msg("tap rx duplicate offset, likely reused wrapper")
continue
}
t.streams[si].prevRxOffset = offset
// We're reaching into t.source to grab mediaInfo, but this is safe because
// it's only touched during setup, so by the time we're grabbing frames,
// we don't need to check the lock.
mm.ContainerInfo = t.source.mediaInfo
mms = append(mms, mm)
gotValidMedia = true
}
if !gotValidMedia {
return nil
}
return mms
}
// nonblockingRx is a non-blocking poll of the tap's channels. It modifies the
// backing store of wrappers in place, adding any frames that are available.
// Returns the number of frames received.
func (t *frameTap) nonblockingRx(wrappers []wrapper) (count int) {
for i, s := range t.streams {
if s.frameC == nil {
continue
}
select {
case w, ok := <-s.frameC:
if !ok {
// This stream is now closed, but we may still need to drain others.
break
}
wrappers[i] = w
count++
default:
}
}
return count
}
// blockingRx blocks on all the channels in the tap until one of them has data
// or all of them have closed. It modifies the backing store of wrappers in place,
// adding any frames that are available.
func (t *frameTap) blockingRx(ctx context.Context, wrappers []wrapper) error {
// The rest of t.selectCases are already set to the channels we care about.
// We overwrite the last case in the slice to be current ctx channel.
t.selectCases[len(t.selectCases)-1] = reflect.SelectCase{
Dir: reflect.SelectRecv,
Chan: reflect.ValueOf(ctx.Done()),
}
for {
if len(t.selectCases) == 1 {
// All the channels have closed, so there should be an error waiting for us.
err := <-t.closeErrC
if err == nil {
err = io.EOF // Caller expects a non-nil error to denote closure.
}
return err
}
// We don't yet have a frame. Block on all channels:
i, value, ok := reflect.Select(t.selectCases)
// The only value of i that tells us anything is the last one, which is the ctx.
// This i may not be the same as the stream index.
if i == len(t.selectCases)-1 {
// The selected channel was the ctx.
return ctx.Err()
}
if !ok {
// Any other channel closure signals that a stream has closed.
// However, others may still have data for us, so we pull this one out
// of the list and try again.
t.selectCases = slices.Delete(t.selectCases, i, i+1)
continue
}
w := value.Interface().(wrapper) //nolint:forcetypeassert // We know.
wrappers[w.StreamIndex()] = w
return nil
}
}
// formatDurationSortable formats a time.Duration as a fixed-width sortable string
// up to double-digit hours and nanosecond precision in a way that is compatible
// with filenames.
func formatDurationSortable(d time.Duration) string {
sign := ""
if d < 0 {
sign = "-"
d = -d
}
hours := d / time.Hour
d %= time.Hour
minutes := d / time.Minute
d %= time.Minute
seconds := d / time.Second
d %= time.Second
nanos := d
return fmt.Sprintf("%s%02dh%02dm%02d.%09ds", sign, hours, minutes, seconds, nanos)
}
// mediaOutputFileName generates a filename for a media output file based on the
// media's source stream index and time offset.
func mediaOutputFileName(media *model.Media, prefix string) string {
// We guess at a file extension. If mime doesn't work, we try the encoding.
ext, ok := mimeTypeToExtension[media.GetInfo().GetType()]
if !ok {
ext = media.GetInfo().GetCodec()
if ext == "none" || ext == "" {
ext = "bin"
}
}
sliceSpec := media.GetKey().GetSliceSpec()
name := fmt.Sprintf("%s-%d-%s.%s", prefix, sliceSpec.GetStreamIndex(),
formatDurationSortable(sliceSpec.GetOffset().AsDuration()), ext)
// If we exceed the max filename length, we truncate toward the end, which
// is more likely to be unique and has the right extension.
const maxNameLen = 255
if len(name) > maxNameLen {
name = name[len(name)-maxNameLen:]
}
return name
}
// protoShallowCopy makes a shallow copy of a proto.Message.
// It only copies the first layer of the message, unlike proto.Clone().
func protoShallowCopy(original proto.Message) proto.Message {
if original == nil {
return nil
}
// We first make a new instance of the same type as the original.
originalValue := reflect.ValueOf(original).Elem()
copyValue := reflect.New(originalValue.Type()).Elem()
// Then copy all the fields that are settable.
for i := 0; i < originalValue.NumField(); i++ {
field := originalValue.Field(i)
if field.CanSet() {
copyValue.Field(i).Set(field)
}
}
return copyValue.Addr().Interface().(proto.Message) //nolint:forcetypeassert // See declaration.
}
// mediaToFile writes 'media's data to a file in 'outDir' with a name
// based on the media's source stream index and time offset.
// Returns an updated copy of the media object.
//
// TODO(casey): This approach is just to bootstrap the file output feature.
// We could consider moving this closer to the `encodeMedia` method,
// where we might be able to avoid some data copying.
func mediaToFile(media *model.Media, outDir, prefix string) (*model.Media, error) {
// If this isn't an inline bytes frame, we leave it as-is.
// TODO(casey): When the datalake is ready to handle raw bytes, we could
// be smarter and hand back inline bytes for small or zero-byte frames.
// Right now, everything must come back as a file, even if it's empty.
formatMB, ok := media.GetFormat().(*model.Media_MediaBytes)
if !ok {
return nil, &unsupportedMediaFormatError{media.GetFormat()}
}
name := filepath.Join(outDir, mediaOutputFileName(media, prefix))
stat, err := os.Stat(name)
if err == nil {
return nil, &duplicateOutputFileError{stat}
}
f, err := os.Create(name)
if err != nil {
// This seems to be happening in the wild, so here's some
// extra debug info to help us figure out why. We can remove this later.
dirEntries, dirErr := os.ReadDir(outDir)
if dirErr != nil {
return nil, fmt.Errorf("failed to create media file, dir failed: %w", dirErr)
}
return nil, fmt.Errorf("failed to create media file, dir entries: %d error: %w", len(dirEntries), err)
}
defer func() { _ = f.Close() }()
_, err = f.Write(formatMB.MediaBytes.GetData())
if err != nil {
return nil, fmt.Errorf("failed to write media output file: %w", err)
}
// The Wrapper manages a single instance of the media proto, and modifying it
// directly would break other taps on the same live stream. So we make a copy.
// We only need a shallow copy since we're only modifying the Format field.
//
// TODO(casey): I don't like this, but it works for now. If we moved
// file conversion up to the wrapper, we *might* be able to avoid this, but
// even then, the contract is that the requester has to delete the resulting files,
// so we'd have to make copies of files for each. Maybe we should have the
// datalake front the multiple async streamers case, and simplify framer
// by removing the whole shared taps feature?
mediaCopy := protoShallowCopy(media).(*model.Media) //nolint:forcetypeassert // We know.
mediaCopy.Format = &model.Media_MediaPath{
MediaPath: &model.MediaPath{
Path: f.Name(),
},
}
return mediaCopy, nil
}
// GetModelMedias returns a slice of model.Medias from t. If `poll` is true,
// it returns immediately, even if no medias are available. Otherwise, it
// blocks until at least one media is available.
// The returned error is either a context err or the disposition of the source.
func (t *frameTap) GetModelMedias(ctx context.Context, poll bool) ([]*model.Media, error) {
var medias []*model.Media
fws := make([]wrapper, t.maxStreamIndex+1)
// For efficiency, we first try to pull once from each channel, non-blocking.
if count := t.nonblockingRx(fws); count > 0 {
medias = t.wrappersToModelMedias(fws)
}
if medias == nil && poll {
// Nothing yet, return an empty slice.
return []*model.Media{}, nil
} else if medias == nil {
for medias == nil { // Loop until encoding succeeds for at least one media.
// fws holds results from each channel. We swap new frames into here
// until we get one that we care about.
// Block until we get a frame or something breaks.
if err := t.blockingRx(ctx, fws); err != nil {
return nil, err
}
medias = t.wrappersToModelMedias(fws)
}
}
// If outputPath is set, the caller wants big frames returned as files, not inline.
if t.outputPath != "" {
fileMedias := medias[:0] // reuses the underlying array of medias
for _, media := range medias {
fileMedia, err := mediaToFile(media, t.outputPath, t.outputFilePrefix)
if err != nil {
log.Info().Err(err).Str(lURL, t.source.url).Msg("failed to convert frame to file")
continue
}
fileMedias = append(fileMedias, fileMedia)
}
medias = fileMedias
}
return medias, nil
}

516
pkg/framer/frame_wrapper.go Normal file
View File

@@ -0,0 +1,516 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package framer
import (
"errors"
"fmt"
"strconv"
"sync"
"github.com/asticode/go-astiav"
"google.golang.org/protobuf/types/known/durationpb"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model"
)
const codecIDForUnknown = astiav.CodecIDNone
var mediaTypeToCodecID = map[astiav.MediaType]astiav.CodecID{
astiav.MediaTypeVideo: astiav.CodecIDMjpeg,
// TODO: We should use aac for audio, but that encoder buffers the input and
// may cause interleaving across frame wrappers. We'd need to switch to a single
// encoder context across all frame wrappers to fix this.
astiav.MediaTypeAudio: astiav.CodecIDWavpack,
astiav.MediaTypeData: codecIDForUnknown,
astiav.MediaTypeSubtitle: astiav.CodecIDText,
astiav.MediaTypeAttachment: codecIDForUnknown,
astiav.MediaTypeUnknown: codecIDForUnknown,
}
const mimeTypeForUnknown = "application/octet-stream"
var codecIDToMimeType = map[astiav.CodecID]string{
astiav.CodecIDMjpeg: "image/jpeg",
astiav.CodecIDTiff: "image/tiff",
astiav.CodecIDAac: "audio/aac",
astiav.CodecIDWavpack: "audio/wav",
astiav.CodecIDText: "text/plain",
astiav.CodecIDNone: mimeTypeForUnknown,
}
// mimeTypeToExtension isn't used here, but it's tightly coupled to the
// codecIDToMimeType map, so we define it here.
var mimeTypeToExtension = map[string]string{
"image/jpeg": "jpg",
"image/tiff": "tiff",
"audio/aac": "aac",
"audio/wav": "wav",
"text/plain": "txt",
}
var (
buffersrcFlags = astiav.NewBuffersrcFlags(astiav.BuffersrcFlagKeepRef)
buffersinkFlags = astiav.NewBuffersinkFlags()
)
type wrapperInvalidError struct{}
func (e *wrapperInvalidError) Error() string {
return "frameWrapper is no longer valid"
}
type filterFindError struct {
filter string
}
func (e *filterFindError) Error() string {
return fmt.Sprintf("could not find filter %q", e.filter)
}
// frameWrapper wraps an astiav.Frame and provides just-in-time
// re-encoding. The Frame field is exported to designate that they
// are directly set by the owner.
//
// Methods are thread-safe after Init(). The Frame may be directly
// written after calling SetValid(false). Then call SetValid(true) to reset
// the frame that will be encoded by ToModelMedia().
type frameWrapper struct {
// Frame is the frame to be encoded. It is exported so the owner can
// set the frame's data. While the frame is being written, it should
// be declared invalid by calling SetValid(false).
Frame *astiav.Frame
// Internals:
// The lock is used by the methods to protect the internals.
lock sync.Mutex
encCodecID astiav.CodecID
encCodec *astiav.Codec
encCodecContext *astiav.CodecContext
filterGraph *astiav.FilterGraph
buffersinkContext *astiav.FilterContext
buffersrcContext *astiav.FilterContext
filterFrame *astiav.Frame
encPkt *astiav.Packet
inputValid bool
modelMedia *model.Media
rawURL string
// Set at allocation:
mimeType string
streamIndex int
sourceEncoding string
}
// newFrameWrapper returns a new frameWrapper using the given mediaType.
// The returned frameWrapper is not ready for use until Init()'ed.
func newFrameWrapper(mediaType astiav.MediaType, sourceStreamIndex int,
sourceEncoding string,
) *frameWrapper {
encCodecID, ok := mediaTypeToCodecID[mediaType]
if !ok {
encCodecID = codecIDForUnknown
log.Info().Int(lIndex, sourceStreamIndex).Uint(lMediaType, uint(mediaType)).
Msg("no CodecID for MediaType")
}
encCodec := astiav.FindEncoder(encCodecID)
if encCodec == nil {
// This is ok, and just means we won't re-encode the data.
log.Debug().Int(lIndex, sourceStreamIndex).Uint(lCodec, uint(encCodecID)).
Msg("no encoder for CodecID")
}
fw := &frameWrapper{
Frame: astiav.AllocFrame(),
encCodecID: encCodecID,
encCodec: encCodec, // May be nil.
filterFrame: astiav.AllocFrame(),
encPkt: astiav.AllocPacket(),
// We restrict the range of encCodecID so this map lookup always succeeds:
mimeType: codecIDToMimeType[encCodecID],
streamIndex: sourceStreamIndex,
sourceEncoding: sourceEncoding,
}
return fw
}
// initFilter initializes the filter graph based on the given decCodecContext.
//
//nolint:funlen // Long but linear.
func (fw *frameWrapper) initFilter(decCodecContext *astiav.CodecContext) error {
var args astiav.FilterArgs
var buffersrc, buffersink *astiav.Filter
// content is the *actual filter* we're doing on the data.
// Almost everything else is just boilerplate.
var content string
switch decCodecContext.MediaType() {
case astiav.MediaTypeVideo:
args = astiav.FilterArgs{
"pix_fmt": strconv.Itoa(int(decCodecContext.PixelFormat())),
"pixel_aspect": decCodecContext.SampleAspectRatio().String(),
"time_base": decCodecContext.TimeBase().String(),
"video_size": strconv.Itoa(decCodecContext.Width()) + "x" + strconv.Itoa(decCodecContext.Height()),
}
buffersrc = astiav.FindFilterByName("buffer")
buffersink = astiav.FindFilterByName("buffersink")
content = fmt.Sprintf("format=pix_fmts=%s", fw.encCodecContext.PixelFormat().Name())
case astiav.MediaTypeAudio:
args = astiav.FilterArgs{
"channel_layout": decCodecContext.ChannelLayout().String(),
"sample_fmt": decCodecContext.SampleFormat().Name(),
"sample_rate": strconv.Itoa(decCodecContext.SampleRate()),
"time_base": decCodecContext.TimeBase().String(),
}
buffersrc = astiav.FindFilterByName("abuffer")
buffersink = astiav.FindFilterByName("abuffersink")
content = fmt.Sprintf("aformat=sample_fmts=%s:channel_layouts=%s",
fw.encCodecContext.SampleFormat().Name(), fw.encCodecContext.ChannelLayout().String())
default:
// No filtering needed.
return nil
}
if buffersrc == nil {
return &filterFindError{"buffersrc"}
}
if buffersink == nil {
return &filterFindError{"buffersink"}
}
// Create filter contexts
fw.filterGraph = astiav.AllocFilterGraph()
var err error
if fw.buffersrcContext, err = fw.filterGraph.NewFilterContext(buffersrc, "in", args); err != nil {
return fmt.Errorf("creating buffersrc context failed: %w", err)
}
if fw.buffersinkContext, err = fw.filterGraph.NewFilterContext(buffersink, "in", nil); err != nil {
return fmt.Errorf("creating buffersink context failed: %w", err)
}
// The Filter I/O's express the pad they want to connect to, so we tell
// the Outputs I/O that it's wired to the "in" pad of the buffersrc context
// and vice-versa.
inputs := astiav.AllocFilterInOut()
defer inputs.Free()
inputs.SetName("out")
inputs.SetFilterContext(fw.buffersinkContext)
inputs.SetPadIdx(0)
inputs.SetNext(nil)
outputs := astiav.AllocFilterInOut()
defer outputs.Free()
outputs.SetName("in")
outputs.SetFilterContext(fw.buffersrcContext)
outputs.SetPadIdx(0)
outputs.SetNext(nil)
// Parse
if err = fw.filterGraph.Parse(content, inputs, outputs); err != nil {
return fmt.Errorf("parsing filter failed: %w", err)
}
// Configure
if err = fw.filterGraph.Configure(); err != nil {
return fmt.Errorf("configuring filter failed: %w", err)
}
return nil
}
// Init initializes the frameWrapper based on the given decCodecContext.
// It prepares the frameWrapper's encoder and filter graph for use.
func (fw *frameWrapper) Init(decCodecContext *astiav.CodecContext, rawURL string) error {
fw.rawURL = rawURL
mediaType := fw.encCodecID.MediaType()
switch mediaType {
case astiav.MediaTypeVideo:
fw.encCodecContext = astiav.AllocCodecContext(fw.encCodec)
if v := fw.encCodec.PixelFormats(); len(v) > 0 {
fw.encCodecContext.SetPixelFormat(v[0])
} else {
fw.encCodecContext.SetPixelFormat(decCodecContext.PixelFormat())
}
fw.encCodecContext.SetSampleAspectRatio(decCodecContext.SampleAspectRatio())
fw.encCodecContext.SetHeight(decCodecContext.Height())
fw.encCodecContext.SetWidth(decCodecContext.Width())
// We manually set quantization to require high quality from the encoder.
fw.encCodecContext.SetFlags(fw.encCodecContext.Flags().Add(astiav.CodecContextFlagQscale))
fw.encCodecContext.SetQmin(1)
case astiav.MediaTypeAudio:
fw.encCodecContext = astiav.AllocCodecContext(fw.encCodec)
if v := fw.encCodec.ChannelLayouts(); len(v) > 0 {
fw.encCodecContext.SetChannelLayout(v[0])
} else {
fw.encCodecContext.SetChannelLayout(decCodecContext.ChannelLayout())
}
if v := fw.encCodec.SampleFormats(); len(v) > 0 {
fw.encCodecContext.SetSampleFormat(v[0])
} else {
fw.encCodecContext.SetSampleFormat(decCodecContext.SampleFormat())
}
fw.encCodecContext.SetChannels(decCodecContext.Channels())
fw.encCodecContext.SetSampleRate(decCodecContext.SampleRate())
default:
// For other media types, we use no encoder or filter.
return nil
}
fw.encCodecContext.SetTimeBase(decCodecContext.TimeBase())
if decCodecContext.Flags().Has(astiav.CodecContextFlagGlobalHeader) {
fw.encCodecContext.SetFlags(fw.encCodecContext.Flags().Add(astiav.CodecContextFlagGlobalHeader))
}
if err := fw.encCodecContext.Open(fw.encCodec, nil); err != nil {
return fmt.Errorf("opening encoder context failed: %w", err)
}
return fw.initFilter(decCodecContext)
}
// Close frees the frameWrapper's resources. This must be called to prevent
// leaks of the underlying FFmpeg objects. Only called at teardown, not between
// reuse.
func (fw *frameWrapper) Close() {
fw.lock.Lock()
defer fw.lock.Unlock()
fw.Frame.Free()
fw.inputValid = false
fw.modelMedia = nil
fw.encPkt.Free()
fw.filterFrame.Free()
// Freeing the FilterGraph frees the src and sink contexts.
if fw.filterGraph != nil {
fw.filterGraph.Free()
}
if fw.encCodecContext != nil {
fw.encCodecContext.Free()
}
}
func (fw *frameWrapper) StreamIndex() int {
return fw.streamIndex
}
// SetValid sets the validity of the embedded ffmpeg Frame. If invalid,
// the ToModelMedia() method will return not attempt to access the Frame
// and will only return a model.Media if it had been previously encoded.
func (fw *frameWrapper) SetValid(valid bool) {
fw.lock.Lock()
defer fw.lock.Unlock()
fw.inputValid = valid
if valid {
// The old model.Media was still accessible before, even while the ffmpeg
// frame was being written. Now that the ffmpeg frame is valid, the first
// call of ToModelMedia() will generate a new model.Media based on the new
// ffmpeg frame.
fw.modelMedia = nil
}
}
func (fw *frameWrapper) PTS() int64 {
fw.lock.Lock()
pts := int64(-1)
if fw.inputValid {
pts = fw.Frame.Pts()
}
fw.lock.Unlock()
return pts
}
func (fw *frameWrapper) TimeBase() astiav.Rational {
return fw.encCodecContext.TimeBase()
}
// encodeMedia encodes f into a model.Media and returns it, or nil if encoding
// fails. Does not touch the fw.modelMedia.
func (fw *frameWrapper) encodeMedia(f *astiav.Frame) *model.Media {
if err := fw.encCodecContext.SendFrame(f); err != nil {
log.Debug().Int(lIndex, fw.streamIndex).Err(err).Msg("error sending to encoder")
return nil
}
fw.encPkt.Unref()
err := fw.encCodecContext.ReceivePacket(fw.encPkt)
if err != nil {
if errors.Is(err, astiav.ErrEagain) {
// Eagain happens when the encoder wants more data to make a packet, e.g., audio.
// However, incoming frames are muxed across multiple frame wrappers, each
// with its own encoding context.
log.Info().Int(lIndex, fw.streamIndex).Msg("warning: encoded frames may be interleaved")
} else {
log.Info().Int(lIndex, fw.streamIndex).Err(err).Msg("error receiving from encoder")
}
return nil
}
if fw.encPkt.Size() == 0 {
log.Info().Int(lIndex, fw.streamIndex).Msg("empty packet encoded from frame")
return nil
}
modelMedia := &model.Media{
Key: &model.MediaKey{
Url: fw.rawURL,
SliceSpec: &model.MediaSliceSpec{
StreamIndex: int32(fw.streamIndex),
},
},
Info: &model.MediaInfo{
Type: fw.mimeType,
Codec: fw.sourceEncoding,
PictureType: pictureTypeToPB[f.PictureType()],
IsKeyFrame: f.KeyFrame(),
},
Format: &model.Media_MediaBytes{
MediaBytes: &model.MediaBytes{
Data: fw.encPkt.Data(),
},
},
}
offset := ptsToDuration(f.Pts(), fw.encCodecContext.TimeBase())
if offset != noPTS {
modelMedia.Key.SliceSpec.Offset = durationpb.New(offset)
}
// Technically, the encoder can return multiple packets for a single frame
// being fed in. We don't support that, so we'll just drain any remainders.
for err == nil {
fw.encPkt.Unref()
err = fw.encCodecContext.ReceivePacket(fw.encPkt)
}
return modelMedia
}
// ToModelMedia encodes the frameWrapper's ffmpeg Frame into a model.Media.
// The first call to ToModelMedia will encode the frame, and subsequent
// calls will return the same model.Media until SetValid(true) is called.
// At that point, the next call to ToModelMedia will encode the new Frame.
// It can return nil if encoding fails or if the embedded Frame is declared
// invalid before this method is called at least once.
func (fw *frameWrapper) ToModelMedia() *model.Media {
fw.lock.Lock()
defer fw.lock.Unlock()
// If the frame was already turned into a model.Media, we can use it as-is.
// Note, this can still work even while the underlying ffmpeg frame
// is invalid and being written. Only once the Frame is declared valid will
// this cached modelMedia be reset.
if fw.modelMedia != nil {
return fw.modelMedia
}
// If the underlying ffmpeg frame has been invalidated, it's being reused
// so we can no longer try to encode it.
// This generally shouldn't happen, but it could in some unlucky situations.
// (It probably means there's already a new frame in the tap.)
if !fw.inputValid {
return nil
}
// Else, we have an encoder.
// If there's no filter, we can just encode fw.Frame.
if fw.filterGraph == nil {
fw.modelMedia = fw.encodeMedia(fw.Frame)
return fw.modelMedia
}
// Else, we have a filter.
if err := fw.buffersrcContext.BuffersrcAddFrame(fw.Frame, buffersrcFlags); err != nil {
log.Info().Int(lIndex, fw.streamIndex).Err(err).Msg("buffersrc add frame error")
return nil
}
fw.filterFrame.Unref()
if err := fw.buffersinkContext.BuffersinkGetFrame(fw.filterFrame, buffersinkFlags); err != nil {
if !errors.Is(err, astiav.ErrEof) && !errors.Is(err, astiav.ErrEagain) {
log.Info().Int(lIndex, fw.streamIndex).Err(err).Msg("buffersink get frame error")
}
return nil
}
fw.filterFrame.SetPictureType(astiav.PictureTypeNone)
fw.modelMedia = fw.encodeMedia(fw.filterFrame)
// Technically, the buffersink can return multiple frames for a single frame
// being fed in. We don't support that, so we'll just drain any remainders.
var err error
for err == nil {
fw.filterFrame.Unref()
err = fw.buffersinkContext.BuffersinkGetFrame(fw.filterFrame, buffersinkFlags)
}
return fw.modelMedia
}
// Unwrap returns the underlying ffmpeg Frame by referencing its
// contents into the caller's frame.
func (fw *frameWrapper) Unwrap(frame *astiav.Frame) error {
fw.lock.Lock()
defer fw.lock.Unlock()
if !fw.inputValid {
return &wrapperInvalidError{}
}
frame.Unref()
_ = frame.Ref(fw.Frame)
return nil
}

208
pkg/framer/framer.go Normal file
View File

@@ -0,0 +1,208 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// Package framer implements the Framer gRPC service.
// It can be invoked directly or by wrapping in a gRPC service endpoint.
package framer
import (
"context"
"sync"
"github.com/asticode/go-astiav"
"github.com/rs/zerolog"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/service"
)
const (
lCodec = "codecID"
lDecoder = "decoder"
lDecoderTaps = "decoderTaps"
lDecStreams = "decStreams"
lDemuxerTaps = "demuxerTaps"
lFile = "file"
lFFMpeg = "ffmpeg"
lFrameCount = "frameCount"
lFrameDropCount = "frameDropCount"
lFrameInterval = "frameInterval"
lFrameRateNum = "frameRateNum"
lFrameRateDen = "frameRateDen"
lIndex = "streamIndex"
lInFormatFlags = "inFormatFlags"
lInStreams = "inStreams"
lLive = "live"
lLoop = "loop"
lLossy = "lossy"
lMediaType = "mediaType"
lOutStreams = "outStreams"
lPacketCount = "packetCount"
lPacketDropCount = "packetDropCount"
lPTS = "pts"
lPTSInterval = "ptsInterval"
lPrime = "prime"
lRecode = "recode"
lRequested = "requested"
lSeekTime = "seekTime"
lSourceCount = "sourceCount"
lSplit = "split"
lStreamInfo = "streamInfo"
lStreamTime = "streamTime"
lTapStreams = "tapStreams"
lTimeBaseNum = "timeBaseNum"
lTimeBaseDen = "timeBaseDen"
lTimeInterval = "timeInterval"
lThrottle = "throttle"
lURL = "url"
lWatch = "watch"
)
//nolint:gochecknoglobals // allows logging from non-method funcs
var log zerolog.Logger
// wrapper represents a wrapped blob of ffmpeg data, either Frame or Packet.
type wrapper interface {
ToModelMedia() *model.Media // Returned media may not have media.Info set.
StreamIndex() int
PTS() int64
TimeBase() astiav.Rational
}
// Framer is the top-level implementation of the gRPC Framer service.
type Framer struct {
config *Config
// sourcesLock protects access to the sources lists.
sourcesLock sync.Mutex
sources map[*source]struct{}
reusableSources map[string]*source // Mapped by raw URL from the user, not normalized.
runSourceC chan *source
service.UnimplementedFramerServer
}
// New returns a new Framer instance.
func New(config *Config, logger *zerolog.Logger) *Framer {
log = logger.With().Str("pkg", "framer").Logger()
if config.LogLevel != ConfigDefault().LogLevel {
level, err := zerolog.ParseLevel(config.LogLevel)
if err != nil {
panic(err.Error())
}
log = log.Level(level)
}
ffmpegLoggerSetup(config)
return &Framer{
config: config,
sources: make(map[*source]struct{}),
reusableSources: make(map[string]*source),
runSourceC: make(chan *source, 1),
}
}
// Init initializes the Framer.
func (f *Framer) Init() error {
return nil
}
// getSource returns a reference-counted source. It might be an existing reusable
// source, or a newly initialized one. The caller is responsible
// for calling source.Unref() when finished with the source.
func (f *Framer) getSource(ctx context.Context, rawURL string,
framing *fps.SourceFileFraming, isLive bool, seek *service.FrameRequest_Seek,
) (*source, error) {
f.sourcesLock.Lock()
defer f.sourcesLock.Unlock()
// First we try to find an existing source and add a reference.
s, ok := f.reusableSources[rawURL]
if ok {
if err := s.Ref(); err == nil {
return s, nil
}
}
// Failing that, we allocate, initialize, and run a new source.
s = newSource()
if err := s.Init(rawURL, framing, isLive, seek, f.config.HwDecoderEnable); err != nil {
return nil, err
}
_ = s.Ref() // Can't fail, since we haven't run it yet.
select {
case f.runSourceC <- s:
case <-ctx.Done():
s.Unref()
return nil, ctx.Err()
}
f.sources[s] = struct{}{}
// We only reuse live-streamed sources; file sources are per-client.
if s.LiveStream {
f.reusableSources[s.url] = s
}
return s, nil
}
// runSource is meant to run async in a goroutine. It calls the source's
// Run() method and removes it from f.sources when it returns.
func (f *Framer) runSource(ctx context.Context, s *source) {
log.Info().Str(lURL, s.url).Msg("source starting")
err := s.Run(ctx)
// This source has exited, but it's possible a new one has already
// taken its place in the map from someone else calling StartFrames.
f.sourcesLock.Lock()
delete(f.sources, s)
if f.reusableSources[s.url] == s {
delete(f.reusableSources, s.url)
}
numSources := len(f.sources)
f.sourcesLock.Unlock()
log.Info().Err(err).Str(lURL, s.url).Int(lSourceCount, numSources).
Msg("source exited")
}
// Run is the main Framer loop.
// We defer the running of sources to this run loop just so they can have the
// long-lived ctx, not the short-lived gRPC service call ctx.
func (f *Framer) Run(ctx context.Context) error {
for {
select {
case s := <-f.runSourceC:
go f.runSource(ctx, s)
case <-ctx.Done():
// This context finishing should also exit any sources.
return nil
}
}
}

134
pkg/framer/in_stream.go Normal file
View File

@@ -0,0 +1,134 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package framer
import (
"container/ring"
"context"
"time"
"github.com/asticode/go-astiav"
"github.com/rs/zerolog"
)
// inStream handles wrapping packets from an input stream in packetWrappers.
type inStream struct {
// InFFmpegStream is exported so it can be referenced when setting up
// corresponding output streams.
InFFmpegStream *astiav.Stream
TimeBase astiav.Rational
// We use a ring buffer of packetWrappers to ingest data.
packetWrappers *ring.Ring
// startPacketTime lets us throttle the stream to the framerate of the input.
startPacketTime time.Time
startPts int64
PktCount int
}
// newInStream returns a new inStream instance. Must be closed with Close() when done.
func newInStream() *inStream {
// 3 wrappers is enough that we can have taps processing one frame as
// the stream distributes the next, and then invalidates the 3rd for the next input.
// It seems like enough to avoid missed frames in the worst case.
const numWrappers = 3
return &inStream{
packetWrappers: ring.New(numWrappers),
}
}
func (st *inStream) MarshalZerologObject(e *zerolog.Event) {
e.Str(lCodec, st.InFFmpegStream.CodecParameters().CodecID().Name()).
Int(lTimeBaseNum, st.TimeBase.Num()).
Int(lTimeBaseDen, st.TimeBase.Den()).
Int(lFrameRateNum, st.InFFmpegStream.AvgFrameRate().Num()).
Int(lFrameRateDen, st.InFFmpegStream.AvgFrameRate().Den()).
Int(lIndex, st.InFFmpegStream.Index())
}
// Close frees ffmpeg resources associated with the stream.
func (st *inStream) Close() {
st.packetWrappers.Do(func(v interface{}) {
if v != nil {
v.(*packetWrapper).Close() //nolint:forcetypeassert // It's a wrapper.
}
})
}
// Init initializes the stream.
func (st *inStream) Init(input *astiav.Stream, rawURL string) {
st.InFFmpegStream = input
st.TimeBase = input.TimeBase()
sourceEncoding := input.CodecParameters().CodecID().Name()
for i := 0; i < st.packetWrappers.Len(); i++ {
st.packetWrappers.Value = newPacketWrapper(input.Index(), sourceEncoding, st.TimeBase, rawURL)
st.packetWrappers = st.packetWrappers.Next()
}
}
func (st *inStream) ThrottleWait(ctx context.Context, pkt *astiav.Packet) error {
if st.startPacketTime.IsZero() {
st.startPacketTime = time.Now()
st.startPts = pkt.Pts()
return nil
}
nextFrameDeadline := ptsToDuration(pkt.Pts()-st.startPts, st.TimeBase)
now := time.Now()
elapsed := now.Sub(st.startPacketTime)
if elapsed < nextFrameDeadline {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(nextFrameDeadline - elapsed):
}
}
return nil
}
// nextPacketWrapper returns the next wrapper in this stream's ring buffer.
func (st *inStream) nextPacketWrapper() *packetWrapper {
st.packetWrappers = st.packetWrappers.Next()
return st.packetWrappers.Value.(*packetWrapper) //nolint:forcetypeassert // It's a wrapper.
}
func (st *inStream) WrapPacket(pkt *astiav.Packet) *packetWrapper {
// We're about to overwrite this wrapper's payload.
// If there's a tap still holding this wrapper, setting it invalid ensures
// that w.Unwrap or w.ToModelMedia() will not try to access the wrapped packet.
pw := st.nextPacketWrapper()
pw.SetValid(false)
// We're not decoding, so we just copy the packet into the packetWrapper.
pw.Packet.Unref()
_ = pw.Packet.Ref(pkt) // Only fails for memory.
// Now this fw has a valid frame and can be sent to taps.
pw.SetValid(true)
st.PktCount++
return pw
}

View File

@@ -0,0 +1,187 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package framer
import (
"sync"
"github.com/asticode/go-astiav"
"google.golang.org/protobuf/types/known/durationpb"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model"
)
// packetWrapper is a wrapper for astiav.Packet. It implements the wrapper
// interface, and can provide the packet data as a model.Media.
// It's different from a frameWrapper in that it does no encoding (e.g., jpeg).
// This is used for data where ffmpeg lacks an encoder/decoder and can
// only pass raw packets through to the taps.
type packetWrapper struct {
// Packet is the packet to be copied directly to the model.Media.
// It is exported so the owner can set the packet data directly.
Packet *astiav.Packet
// Set at allocation:
mimeType string
streamIndex int
sourceEncoding string
timeBase astiav.Rational
rawURL string
// Internals:
// The lock is used by the methods to protect the internals.
lock sync.Mutex
inputValid bool
modelMedia *model.Media
}
func newPacketWrapper(sourceStreamIndex int, sourceEncoding string,
timeBase astiav.Rational, rawURL string,
) *packetWrapper {
pw := &packetWrapper{
Packet: astiav.AllocPacket(),
// This is a raw packet with no encoder, so whatever the payload was,
// we just return it as an octet-stream mime type.
mimeType: mimeTypeForUnknown,
streamIndex: sourceStreamIndex,
sourceEncoding: sourceEncoding,
timeBase: timeBase,
rawURL: rawURL,
}
return pw
}
func (pw *packetWrapper) Close() {
pw.lock.Lock()
defer pw.lock.Unlock()
pw.Packet.Free()
pw.Packet = nil
}
func (pw *packetWrapper) StreamIndex() int {
return pw.streamIndex
}
// SetValid sets the validity of the embedded ffmpeg Packet. If invalid,
// the ToModelMedia() method will return not attempt to access the Packet
// and will only return a model.Media if it had been previously encoded.
func (pw *packetWrapper) SetValid(valid bool) {
pw.lock.Lock()
defer pw.lock.Unlock()
pw.inputValid = valid
if valid {
// The old model.Media was still accessible before, even while the ffmpeg
// frame was being written. Now that the ffmpeg frame is valid, the first
// call of ToModelMedia() will generate a new model.Media based on the new
// ffmpeg frame.
pw.modelMedia = nil
}
}
func (pw *packetWrapper) PTS() int64 {
pw.lock.Lock()
pts := int64(-1)
if pw.inputValid {
pts = pw.Packet.Pts()
}
pw.lock.Unlock()
return pts
}
func (pw *packetWrapper) TimeBase() astiav.Rational {
return pw.timeBase
}
// ToModelMedia encodes the packetWrapper's ffmpeg Packet into a model.Media.
// The first call to ToModelMedia will encode the Packet, and subsequent
// calls will return the same model.Media until SetValid(true) is called.
// At that point, the next call to ToModelMedia will encode the new Packet.
// It can return nil if the embedded Packet is declared
// invalid before this method is called at least once.
func (pw *packetWrapper) ToModelMedia() *model.Media {
pw.lock.Lock()
defer pw.lock.Unlock()
// If the Packet was already turned into a model.Media, we can use it as-is.
// Note, this can still work even while the underlying ffmpeg Packet
// is invalid and being written. Only once the Packet is declared valid will
// this cached modelMedia be reset.
if pw.modelMedia != nil {
return pw.modelMedia
}
// If the underlying ffmpeg frame has been invalidated, it's being reused
// so we can no longer try to encode it.
// This generally shouldn't happen, but it could in some unlucky situations.
// (It probably means there's already a new frame in the tap.)
if !pw.inputValid {
log.Info().Msg("bad timing: frame invalidated before encoding")
return nil
}
if pw.Packet == nil {
log.Info().Msg("Missing Packet: frame invalidated before encoding")
return nil
}
pw.modelMedia = &model.Media{
Key: &model.MediaKey{
Url: pw.rawURL,
SliceSpec: &model.MediaSliceSpec{
StreamIndex: int32(pw.streamIndex),
},
},
Info: &model.MediaInfo{
Type: pw.mimeType,
Codec: pw.sourceEncoding,
},
Format: &model.Media_MediaBytes{
MediaBytes: &model.MediaBytes{
Data: pw.Packet.Data(),
},
},
}
offset := ptsToDuration(pw.Packet.Pts(), pw.timeBase)
if offset != noPTS {
pw.modelMedia.Key.SliceSpec.Offset = durationpb.New(offset)
}
return pw.modelMedia
}
func (pw *packetWrapper) Unwrap(packet *astiav.Packet) error {
pw.lock.Lock()
defer pw.lock.Unlock()
if !pw.inputValid {
return &wrapperInvalidError{}
}
packet.Unref()
_ = packet.Ref(pw.Packet)
return nil
}

768
pkg/framer/source.go Normal file
View File

@@ -0,0 +1,768 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package framer
import (
"context"
"errors"
"fmt"
"io"
"net/url"
"path/filepath"
"strings"
"sync"
"time"
"github.com/asticode/go-astiav"
"github.com/rs/zerolog"
"golang.org/x/exp/slices"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/service"
"github.com/TurbineOne/ffmpeg-framer/pkg/mimer"
)
type noStreamsError struct {
url string
}
func (e *noStreamsError) Error() string {
return fmt.Sprintf("no streams found in %q", e.url)
}
type sourceExitedError struct{}
func (e *sourceExitedError) Error() string {
return "failed to create reference; source already exited"
}
type seekNotSupportedError struct {
url string
}
func (e *seekNotSupportedError) Error() string {
return fmt.Sprintf("seek not supported for %q", e.url)
}
// tap represents a tap on a source. Taps are used to either consume individual
// frames from the source, or to monitor a recording of the source.
type tap interface {
Send(context.Context, wrapper, bool)
DrainUntilClosed()
Close(error) // The Close err is the disposition of the source, i.e., why it closed.
}
// source represents a source of frames, either a network stream or a file.
type source struct {
// Set at Init():
url string
scheme string
outFilePrefix string
fileThrottle fps.SourceFileFraming_FileThrottle
seekTime time.Duration
startAtKeyframe bool
LiveStream bool // a source that cannot be throttled and should be reused by taps
hwDecoderEnable bool
// Reference counting lets us share a source among multiple clients.
refLock sync.Mutex
refCount int
refDisabled bool
cancel context.CancelFunc
// State shared with the taps:
tapsLock sync.Mutex
tapsDisabled bool
demuxerTaps []tap // Taps that want raw demuxed packets before any decoding (iow, record clients)
decoderTaps []tap // Taps that want decoded frames, if available (iow, frame clients)
tapAddedC chan struct{}
// Internal state:
// The MediaInfo and streams are not available until we have a chance to read
// the source, so the writer lock is held at newSource() until setup completes.
// This MediaInfo includes streams and is returned to callers of Start().
// It refers to the source media we're asked to frame, not the individual frames.
// So this mediaInfo is also returned as the ContainerMediaInfo for individual frames.
mediaInfoLock sync.RWMutex
setupErr error // If we fail to get through setup, the error is preserved here.
mediaInfo *model.MediaInfo
lossySend bool // taps drop packets if not consumed fast enough
isContainer bool // e.g., mp4, as opposed to image formats like gif or jpg.
inStreams map[int]*inStream
decStreams map[int]*decStream
// FFmpeg state:
inputFormatContext *astiav.FormatContext
}
// newSource creates a new source.
func newSource() *source {
s := &source{
mediaInfo: &model.MediaInfo{
Streams: make(map[int32]*model.StreamInfo),
},
tapAddedC: make(chan struct{}, 1),
inStreams: make(map[int]*inStream),
decStreams: make(map[int]*decStream),
}
s.mediaInfoLock.Lock() // unlocked once the source is started
return s
}
// Init initializes the source. It must be called before running the source.
func (s *source) Init(rawURL string, framing *fps.SourceFileFraming,
isLive bool, seek *service.FrameRequest_Seek, hwDecoderEnable bool,
) error {
s.url = rawURL
s.LiveStream = isLive
s.fileThrottle = framing.GetThrottle()
s.seekTime = seek.GetTime().AsDuration()
s.startAtKeyframe = seek.GetClosestKeyframe()
s.hwDecoderEnable = hwDecoderEnable
if parsedURL, err := url.Parse(s.url); err != nil {
if !strings.HasPrefix(s.url, "/") {
return fmt.Errorf("parsing url failed: %w", err)
}
// not all file names can be parsed as URLs because of special characters
s.scheme = ""
} else {
s.scheme = parsedURL.Scheme
}
// If this source's frames are requested as files, this is the filename prefix.
s.outFilePrefix = strings.Trim(s.url, string(filepath.Separator))
for _, char := range []string{":", "?", "*", "\"", "<", ">", "|", "\\", "/"} {
s.outFilePrefix = strings.ReplaceAll(s.outFilePrefix, char, "_")
}
log.Info().Str(lURL, s.url).Str(lThrottle, s.fileThrottle.String()).
Bool(lLive, s.LiveStream).Dur(lSeekTime, s.seekTime).Msg("source init")
return nil
}
// Ref adds a reference to s. If s has already exited, returns an error.
func (s *source) Ref() error {
s.refLock.Lock()
defer s.refLock.Unlock()
if s.refDisabled {
return &sourceExitedError{}
}
s.refCount++
return nil
}
// Unref removes a reference from s. If s has no more references, cancels its context.
func (s *source) Unref() {
s.refLock.Lock()
defer s.refLock.Unlock()
s.refCount--
if s.refCount == 0 {
s.refDisabled = true
if s.cancel != nil {
s.cancel()
}
}
}
// ffmpegStreamToStreamInfo converts an FFmpeg stream to a StreamInfo.
func ffmpegStreamToStreamInfo(ffmpegStream *astiav.Stream) *model.StreamInfo {
streamInfo := &model.StreamInfo{
Index: int32(ffmpegStream.Index()),
// TODO(casey): This version of astiav doesn't expose IANA media types
// for streams, so we're just passing the MediaType string,
// e.g., "video". We could try to handle this better.
Type: ffmpegStream.CodecParameters().MediaType().String(),
Codec: ffmpegStream.CodecParameters().CodecID().Name(),
Duration: ptsToDurationPB(ffmpegStream.Duration(), ffmpegStream.TimeBase()),
StartOffset: ptsToDurationPB(ffmpegStream.StartTime(), ffmpegStream.TimeBase()),
AvgFrameRate: rationalToPB(ffmpegStream.AvgFrameRate()),
RealBaseFrameRate: rationalToPB(ffmpegStream.RFrameRate()),
TimeBase: rationalToPB(ffmpegStream.TimeBase()),
}
switch ffmpegStream.CodecParameters().MediaType() {
case astiav.MediaTypeVideo:
streamInfo.Stream = &model.StreamInfo_Video{Video: &model.VideoStreamInfo{
FrameCount: ffmpegStream.NbFrames(),
Width: int32(ffmpegStream.CodecParameters().Width()),
Height: int32(ffmpegStream.CodecParameters().Height()),
Fps: ffmpegStream.RFrameRate().ToDouble(),
}}
case astiav.MediaTypeAudio:
streamInfo.Stream = &model.StreamInfo_Audio{Audio: &model.AudioStreamInfo{
SampleCount: ffmpegStream.NbFrames(),
SamplesPerSecond: float64(ffmpegStream.CodecParameters().SampleRate()),
Channels: int32(ffmpegStream.CodecParameters().Channels()),
}}
case astiav.MediaTypeSubtitle:
streamInfo.Stream = &model.StreamInfo_Subtitle{Subtitle: &model.SubtitleStreamInfo{}}
case astiav.MediaTypeData:
streamInfo.Stream = &model.StreamInfo_Data{Data: &model.DataStreamInfo{}}
case astiav.MediaTypeAttachment:
streamInfo.Stream = &model.StreamInfo_Attachment{Attachment: &model.AttachmentStreamInfo{}}
case astiav.MediaTypeUnknown:
default:
streamInfo.Stream = &model.StreamInfo_Unknown{Unknown: &model.UnknownStreamInfo{}}
}
if metaDict := ffmpegStream.Metadata(); metaDict != nil {
streamInfo.Metadata = dictToMap(metaDict)
}
return streamInfo
}
// resetStreams resets all streams and stream info.
func (s *source) resetStreams(ffmpegInStreams []*astiav.Stream) map[int32]*model.StreamInfo {
for _, inStream := range s.inStreams {
inStream.Close()
}
for _, decStream := range s.decStreams {
decStream.Close()
}
s.inStreams = make(map[int]*inStream)
s.decStreams = make(map[int]*decStream)
modelStreamInfo := make(map[int32]*model.StreamInfo, len(ffmpegInStreams))
for _, ffmpegStream := range ffmpegInStreams {
index := ffmpegStream.Index()
// We create a map of stream info to return to callers of Start().
modelStreamInfo[int32(index)] = ffmpegStreamToStreamInfo(ffmpegStream)
s.inStreams[index] = newInStream()
s.inStreams[index].Init(ffmpegStream, s.url)
s.decStreams[index] = newDecStream()
s.decStreams[index].Init(s.inputFormatContext, ffmpegStream, s.url, s.hwDecoderEnable)
}
// Set the start time for all streams.
if s.seekTime > 0 && !s.LiveStream {
// convert seconds to timebase
streamTime := int64(s.seekTime.Seconds() * float64(astiav.TimeBase))
flags := astiav.NewSeekFlags(astiav.SeekFlagBackward, astiav.SeekFlagFrame)
log.Debug().Dur(lSeekTime, s.seekTime).Int64(lStreamTime, streamTime).
Msg("seeking")
err := s.inputFormatContext.SeekFrame(-1, streamTime, flags)
if err != nil {
log.Info().Dur(lSeekTime, s.seekTime).Int64(lStreamTime, streamTime).
Err(err).Msg("error seeking")
}
}
// If we have a simple image file that does not require streaming,
// we return media info with isContainer=false and disable streaming.
// Callers should just grab the image directly.
//
// NOTE(casey): I've looked a bunch of different ways of detecting this, but
// none of the fields you'd want to use are consistent, and I'm not really
// sure what we want to do for animated gifs and jpegs.
// - media_type is video(0) for all image formats, regardless
// - nb_frames is zero on some video streams that have many frames
// - duration is often 1 for images, but sometimes not, even for non-animated gifs.
// So I opted for just checking specific codecs, and we can expand this list
// or refine the logic as needed.
s.isContainer = true
if !s.LiveStream && len(ffmpegInStreams) == 1 {
codecID := ffmpegInStreams[0].CodecParameters().CodecID()
switch codecID {
case astiav.CodecIDAnsi,
astiav.CodecIDBmp,
astiav.CodecIDGif,
astiav.CodecIDJpeg2000,
astiav.CodecIDJpegls,
astiav.CodecIDMjpeg,
astiav.CodecIDPam,
astiav.CodecIDPbm,
astiav.CodecIDPgm,
astiav.CodecIDPgmyuv,
astiav.CodecIDPng,
astiav.CodecIDPpm,
astiav.CodecIDTiff,
astiav.CodecIDWebp:
log.Info().Str(lURL, s.url).Str(lCodec, codecID.String()).Msg("non-container image format")
s.isContainer = false
}
}
return modelStreamInfo
}
// setup is internal and called by the source's main run loop.
func (s *source) setup() error {
// Regardless of how we exit, we need to allow callers to read whatever
// stream metadata we manage to collect.
defer s.mediaInfoLock.Unlock()
s.inputFormatContext = astiav.AllocFormatContext()
optsDict := astiav.NewDictionary()
defer optsDict.Free()
// This flag should only be needed for hls streams, but it doesn't hurt to set it.
// Start at the last segment (most recent) for live streams.
if err := optsDict.Set("live_start_index", "-1", astiav.DictionaryFlags(0)); err != nil {
s.setupErr = fmt.Errorf("setting live_start_index failed: %w", err)
return s.setupErr
}
if s.LiveStream {
if err := optsDict.Set("use_wallclock_as_timestamps", "1", astiav.DictionaryFlags(0)); err != nil {
s.setupErr = fmt.Errorf("setting use_wallclock_as_timestamps failed: %w", err)
return s.setupErr
}
}
if err := s.openInput(nil, optsDict); err != nil {
s.setupErr = err
return s.setupErr
}
// We let the taps drop frames if it's a live stream, or if lock-step throttle is disabled.
s.lossySend = s.LiveStream || (s.fileThrottle != fps.SourceFileFraming_FILE_THROTTLE_LOCK_STEP)
ffmpegInStreams := s.inputFormatContext.Streams()
if len(ffmpegInStreams) == 0 {
s.setupErr = &noStreamsError{s.url}
return s.setupErr
}
streams := s.resetStreams(ffmpegInStreams)
// Technically, avlib can handle this, but the API to get this info is not
// exposed by the current version of astiav, so we do it here.
mimeType := mimer.UnknownMediaType
if s.scheme == "" {
mimeType = mimer.GetContentType(s.url)
}
s.mediaInfo = &model.MediaInfo{
Type: mimeType,
Duration: ptsToDurationPB(s.inputFormatContext.Duration(), astiav.TimeBaseQ),
Streams: streams,
IsContainer: s.isContainer,
IsSeekable: !s.LiveStream,
}
logInStreams := zerolog.Arr()
for _, st := range s.inStreams {
logInStreams.Object(st)
}
logDecStreams := zerolog.Arr()
for _, st := range s.decStreams {
logDecStreams.Object(st)
}
log.Info().Str(lURL, s.url).Bool(lLossy, s.lossySend).
Str(lInFormatFlags, ioFormatFlagsToString(s.inputFormatContext.InputFormat().Flags())).
Array(lInStreams, logInStreams).Array(lDecStreams, logDecStreams).
Msg("source setup")
return nil
}
// sendDemuxerTaps sends w to each demuxer tap.
func (s *source) sendDemuxerTaps(ctx context.Context, w wrapper) {
// We have to hold the lock for the entire duration of this function.
// This prevents a tap from being asynchronously Stop()'d and its channels
// closed while we're writing to them.
s.tapsLock.Lock()
defer s.tapsLock.Unlock()
for _, t := range s.demuxerTaps {
t.Send(ctx, w, s.lossySend)
}
}
// sendDecoderTaps sends w to each decoder tap.
func (s *source) sendDecoderTaps(ctx context.Context, w wrapper) {
// Demuxing starts at a keyframe, so we can't send frames until we've
// demuxed to the start time.
if !s.LiveStream && !s.startAtKeyframe && s.seekTime > 0 && ptsToDuration(w.PTS(), w.TimeBase()) < s.seekTime {
log.Trace().Int(lIndex, w.StreamIndex()).Dur(lSeekTime, s.seekTime).
Dur(lPTS, ptsToDuration(w.PTS(), w.TimeBase())).Msg("skipping frame prior to startTime")
return
}
// We have to hold the lock for the entire duration of this function.
// This prevents a tap from being asynchronously Stop()'d and its channels
// closed while we're writing to them.
s.tapsLock.Lock()
defer s.tapsLock.Unlock()
for _, t := range s.decoderTaps {
t.Send(ctx, w, s.lossySend)
}
}
func (s *source) SetupErr() error {
s.mediaInfoLock.RLock()
defer s.mediaInfoLock.RUnlock()
return s.setupErr
}
func (s *source) MediaInfo() *model.MediaInfo {
s.mediaInfoLock.RLock()
defer s.mediaInfoLock.RUnlock()
return s.mediaInfo
}
// InStreams returns the map of input streams. This accessor is just here so
// a destination can align its output streams with the source's input streams.
func (s *source) InStreams() map[int]*inStream {
s.mediaInfoLock.RLock()
defer s.mediaInfoLock.RUnlock()
return s.inStreams
}
// DecStreams returns the map of decoder streams. This accessor is just here so
// a frame tap can align its streams with ours.
func (s *source) DecStreams() map[int]*decStream {
s.mediaInfoLock.RLock()
defer s.mediaInfoLock.RUnlock()
return s.decStreams
}
type tapLayer int
const (
tapLayerDemuxer tapLayer = iota
tapLayerDecoder
)
// AddTap adds a tap to 's'.
func (s *source) AddTap(t tap, layer tapLayer) error {
s.tapsLock.Lock()
defer s.tapsLock.Unlock()
if s.tapsDisabled {
return &sourceExitedError{}
}
switch layer {
case tapLayerDemuxer:
s.demuxerTaps = append(s.demuxerTaps, t)
case tapLayerDecoder:
s.decoderTaps = append(s.decoderTaps, t)
}
// Only the first tap is critical, so we don't block on it.
select {
case s.tapAddedC <- struct{}{}:
default:
}
return nil
}
// RemoveTap removes a tap from 's'. This is an interface call for the consumer,
// symmetrical to AddTap(). The source itself should not use this call, as it
// drains the tap before closing, which could steal desirable frames from the
// consumer.
func (s *source) RemoveTap(t tap) {
t.DrainUntilClosed()
s.tapsLock.Lock()
defer s.tapsLock.Unlock()
t.Close(nil)
if i := slices.Index(s.decoderTaps, t); i >= 0 {
s.decoderTaps = slices.Delete(s.decoderTaps, i, i+1)
return
}
if i := slices.Index(s.demuxerTaps, t); i >= 0 {
s.demuxerTaps = slices.Delete(s.demuxerTaps, i, i+1)
}
}
// exit cleans up the source, closing all taps and freeing ffmpeg resources.
func (s *source) exit(err error) {
s.refLock.Lock()
s.cancel() // Only affects the source Run loop.
s.refDisabled = true
s.refLock.Unlock()
s.tapsLock.Lock()
s.tapsDisabled = true
decoderTaps := make([]tap, len(s.decoderTaps))
copy(decoderTaps, s.decoderTaps)
s.decoderTaps = nil
demuxerTaps := make([]tap, len(s.demuxerTaps))
copy(demuxerTaps, s.demuxerTaps)
s.demuxerTaps = nil
s.tapsLock.Unlock()
for _, t := range decoderTaps {
t.Close(err)
}
for _, t := range demuxerTaps {
t.Close(err)
}
pktCounts := make([]int, len(s.inStreams))
for i, st := range s.inStreams {
pktCounts[i] = st.PktCount
st.Close()
}
frameCounts := make([]int, len(s.decStreams))
for i, st := range s.decStreams {
frameCounts[i] = st.FrameCount
st.Close()
}
log.Debug().Ints(lFrameCount, frameCounts).Ints(lPacketCount, pktCounts).
Int(lDecoderTaps, len(decoderTaps)).Int(lDemuxerTaps, len(demuxerTaps)).
Str(lURL, s.url).Msg("source exiting, taps closed")
s.inputFormatContext.CloseInput()
s.inputFormatContext.Free()
}
// flushDecoderToTaps flushes the decoder and sends any remaining frames to taps.
func (s *source) flushDecoderToTaps(ctx context.Context) {
pendingTaps := false
for _, decStream := range s.decStreams {
if decStream == nil {
continue
}
wrappers, err := decStream.DecodeFlush()
if err != nil {
continue
}
for _, w := range wrappers {
s.sendDecoderTaps(ctx, w)
pendingTaps = true
}
}
// Give the taps a chance to drain before we exit.
if pendingTaps {
time.Sleep(1 * time.Second)
}
}
// readAndDecode reads a packet from the source, decodes it if necessary, and
// distributes it to both packet and decoder taps. This is a blocking call.
func (s *source) readAndDecode(ctx context.Context, pkt *astiav.Packet) error {
s.tapsLock.Lock()
shouldDecode := len(s.decoderTaps) > 0
s.tapsLock.Unlock()
// If this source is a non-container file (e.g., gif, jpeg) we bail.
if !s.isContainer {
return io.EOF
}
pkt.Unref()
// This read is blocking:
if err := s.inputFormatContext.ReadFrame(pkt); err != nil {
if errors.Is(err, astiav.ErrEof) {
s.flushDecoderToTaps(ctx)
err = io.EOF
}
return fmt.Errorf("source: input read failed: %w", err)
}
index := pkt.StreamIndex()
// If we don't have a matching stream object, we skip this packet.
inStream := s.inStreams[index]
if inStream == nil {
return nil
}
// If this is a file and we're throttling to the presentation rate, we
// wait until the PTS. This is obviously blocking.
if !s.LiveStream && s.fileThrottle == fps.SourceFileFraming_FILE_THROTTLE_PRESENTATION_RATE {
if err := inStream.ThrottleWait(ctx, pkt); err != nil {
return nil //nolint:nilerr // Likely a context cancellation, let caller figure it out.
}
}
pw := inStream.WrapPacket(pkt)
s.sendDemuxerTaps(ctx, pw) // This can also block, but probably not for long.
if !shouldDecode {
return nil
}
decStream := s.decStreams[index]
if decStream == nil {
return nil
}
wrappers, err := decStream.Decode(pw)
if err != nil {
log.Info().Err(err).Str(lURL, s.url).Int(lIndex, index).
Msg("decode failed, dropping packet")
return nil
}
for _, w := range wrappers {
s.sendDecoderTaps(ctx, w)
}
return nil
}
// openInput opens the inputFormatContext for the source.
func (s *source) openInput(format *astiav.InputFormat, d *astiav.Dictionary) error {
if err := s.inputFormatContext.OpenInput(s.url, format, d); err != nil {
return fmt.Errorf("opening input failed: %w", err)
}
if err := s.inputFormatContext.FindStreamInfo(nil); err != nil {
s.inputFormatContext.CloseInput()
return fmt.Errorf("finding stream info failed: %w", err)
}
return nil
}
// reopenInput closes and reopens the inputFormatContext at the specified start time.
func (s *source) reopenInput(seekTime time.Duration, startAtKeyframe bool) error {
s.inputFormatContext.CloseInput()
log.Info().Str(lURL, s.url).Msg("reopening input file")
optsDict := astiav.NewDictionary()
defer optsDict.Free()
// This flag should only be needed for hls streams, but it doesn't hurt to set it.
// Start at the last segment (most recent) for live streams.
if err := optsDict.Set("live_start_index", "-1", astiav.DictionaryFlags(0)); err != nil {
return fmt.Errorf("setting live_start_index failed: %w", err)
}
if err := s.openInput(nil, optsDict); err != nil {
return err
}
ffmpegStreams := s.inputFormatContext.Streams()
s.mediaInfoLock.Lock()
s.seekTime = seekTime
s.startAtKeyframe = startAtKeyframe
s.resetStreams(ffmpegStreams)
s.mediaInfoLock.Unlock()
return nil
}
// Seek seeks to the given timestamp in the input stream.
// At the moment this is only supported for single file non-live streams.
// And even then, only if you call seek before the first frame is read.
// TODO: Support more scenarios. This will require a rethink on
// mutexes and how we handle the inputFormatContext.
func (s *source) Seek(seekTime time.Duration, startAtKeyframe bool) error {
if s.LiveStream {
return &seekNotSupportedError{url: s.url}
}
return s.reopenInput(seekTime, startAtKeyframe)
}
// Run manages the source, reading packets from the input URL and distributing
// frames to taps.
func (s *source) Run(ctx context.Context) error {
s.refLock.Lock()
if s.refDisabled {
s.refLock.Unlock()
return nil
}
ctx, s.cancel = context.WithCancel(ctx)
s.refLock.Unlock()
var err error
defer func() {
s.exit(err)
}()
if err = s.setup(); err != nil {
return err
}
// If this isn't live, we wait for the first tap to be added before reading.
if !s.LiveStream {
select {
case <-s.tapAddedC:
case <-ctx.Done():
err = ctx.Err()
return err
}
}
pkt := astiav.AllocPacket()
defer pkt.Free()
for {
// This loop blocks mostly on I/O so we can only poll for ctx cancellation.
if ctx.Err() != nil {
err = ctx.Err()
return err
}
if err = s.readAndDecode(ctx, pkt); err != nil {
return err
}
}
}

148
pkg/framer/util.go Normal file
View File

@@ -0,0 +1,148 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package framer
import (
"math"
"math/big"
"strings"
"time"
"github.com/asticode/go-astiav"
"google.golang.org/protobuf/types/known/durationpb"
"github.com/TurbineOne/ffmpeg-framer/api/proto/gen/go/fps/model"
)
const noPTS = time.Duration(math.MinInt64)
var pictureTypeToPB = map[astiav.PictureType]model.MediaInfo_PictureType{
astiav.PictureTypeI: model.MediaInfo_PICTURE_TYPE_I,
astiav.PictureTypeP: model.MediaInfo_PICTURE_TYPE_P,
astiav.PictureTypeB: model.MediaInfo_PICTURE_TYPE_B,
astiav.PictureTypeS: model.MediaInfo_PICTURE_TYPE_S,
astiav.PictureTypeSi: model.MediaInfo_PICTURE_TYPE_SI,
astiav.PictureTypeSp: model.MediaInfo_PICTURE_TYPE_SP,
astiav.PictureTypeBi: model.MediaInfo_PICTURE_TYPE_BI,
}
var ioFormatFlagStrings = map[astiav.IOFormatFlag]string{
astiav.IOFormatFlagNofile: "IOFormatFlagNofile",
astiav.IOFormatFlagNeednumber: "IOFormatFlagNeednumber",
astiav.IOFormatFlagShowIds: "IOFormatFlagShowIds",
astiav.IOFormatFlagGlobalheader: "IOFormatFlagGlobalheader",
astiav.IOFormatFlagNotimestamps: "IOFormatFlagNotimestamps",
astiav.IOFormatFlagGenericIndex: "IOFormatFlagGenericIndex",
astiav.IOFormatFlagTsDiscont: "IOFormatFlagTsDiscont",
astiav.IOFormatFlagVariableFps: "IOFormatFlagVariableFps",
astiav.IOFormatFlagNodimensions: "IOFormatFlagNodimensions",
astiav.IOFormatFlagNostreams: "IOFormatFlagNostreams",
astiav.IOFormatFlagNobinsearch: "IOFormatFlagNobinsearch",
astiav.IOFormatFlagNogensearch: "IOFormatFlagNogensearch",
astiav.IOFormatFlagNoByteSeek: "IOFormatFlagNoByteSeek",
astiav.IOFormatFlagAllowFlush: "IOFormatFlagAllowFlush",
astiav.IOFormatFlagTsNonstrict: "IOFormatFlagTsNonstrict",
astiav.IOFormatFlagTsNegative: "IOFormatFlagTsNegative",
astiav.IOFormatFlagSeekToPts: "IOFormatFlagSeekToPts",
}
// ioFormatFlagsToString returns a string representation of astiav.IOFormatFlags.
func ioFormatFlagsToString(flags astiav.IOFormatFlags) string {
var setFlags []string
for bit, name := range ioFormatFlagStrings {
if flags&astiav.IOFormatFlags(bit) != 0 {
setFlags = append(setFlags, name)
}
}
if len(setFlags) == 0 {
return ""
}
return strings.Join(setFlags, " | ")
}
// dictToMap converts an astiav.Dictionary to a map[string]string.
func dictToMap(d *astiav.Dictionary) map[string]string {
result := make(map[string]string)
var entry *astiav.DictionaryEntry
for {
entry = d.Get("", entry, astiav.DictionaryFlags(astiav.DictionaryFlagIgnoreSuffix))
if entry == nil {
break
}
result[entry.Key()] = entry.Value()
}
return result
}
// ptsToDuration converts pts to a time.Duration.
func ptsToDuration(pts int64, timeBase astiav.Rational) time.Duration {
if pts == astiav.NoPtsValue {
return noPTS
}
if timeBase.Den() == 0 {
return 0
}
ptsBig := big.NewInt(pts)
secBig := big.NewInt(int64(time.Second))
tbNumBig := big.NewInt(int64(timeBase.Num()))
tbDenBig := big.NewInt(int64(timeBase.Den()))
durBig := new(big.Int).Mul(ptsBig, secBig)
durBig.Mul(durBig, tbNumBig).Div(durBig, tbDenBig)
return time.Duration(durBig.Int64())
}
// durationToPts converts a time.Duration to pts.
func durationToPts(duration time.Duration, timeBase astiav.Rational) int64 {
if timeBase.Num() == 0 {
return 0
}
// Using float math here in order to round to the nearest integer.
// e.g. pts 23.99 should round to 24, not 23.
return int64(duration.Seconds()*float64(timeBase.Den())/
float64(timeBase.Num()) + 0.5)
}
// ptsToDurationPB converts pts to a durationpb.Duration given a timebase.
func ptsToDurationPB(pts int64, timeBase astiav.Rational) *durationpb.Duration {
dur := ptsToDuration(pts, timeBase)
if dur == noPTS {
return nil
}
return durationpb.New(dur)
}
// rationalToPB converts an astiav.Rational to a model.Rational.
func rationalToPB(r astiav.Rational) *model.Rational {
return &model.Rational{
Num: int64(r.Num()),
Den: int64(r.Den()),
}
}

View File

@@ -0,0 +1,42 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// Package interrupt handles interrupt signals from the OS.
package interrupt
import (
"context"
"errors"
"os"
"os/signal"
"syscall"
)
// ErrInterrupt indicates that an interrupt was received from the OS.
var ErrInterrupt = errors.New("interrupt received")
// Run runs until ctx is done or an interrupt is received.
func Run(ctx context.Context) error {
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan, syscall.SIGTERM, syscall.SIGINT)
select {
case <-signalChan: // first signal, cancel context
return ErrInterrupt
case <-ctx.Done():
return nil
}
}

77
pkg/logger/logger.go Normal file
View File

@@ -0,0 +1,77 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package logger
import (
"io"
"os"
"time"
"github.com/mattn/go-isatty"
"github.com/rs/zerolog"
)
func init() {
// Users of our logging will always adhere to these global settings:
zerolog.TimeFieldFormat = time.RFC3339Nano
zerolog.DurationFieldInteger = false
zerolog.DurationFieldUnit = time.Second
}
// Config configures the logger.
type Config struct { //nolint:govet // Don't care about alignment.
Level string `yaml:"level" json:"level" doc:"Log level. One of: trace, debug, info, warn, error, fatal, panic"`
Console bool `yaml:"console" json:"console" doc:"Logging includes terminal colors"`
}
// ConfigDefault returns the default values for a Config.
func ConfigDefault() Config {
return Config{
Level: zerolog.InfoLevel.String(),
Console: false,
}
}
// termOut returns a ConsoleWriter if we detect a tty or console config,
// otherwise returns os.Stdout since we're assuming we're running under docker.
func termOut(c *Config) io.Writer {
if c.Console || isatty.IsTerminal(os.Stdout.Fd()) {
return zerolog.ConsoleWriter{
Out: os.Stdout,
TimeFormat: "2006-01-02T15:04:05.000000", // Omitting timezone on console.
}
}
return os.Stdout
}
// New returns a new logger as described by the config. If logging to a file
// is enabled, also returns the file name.
// Panics in case of an invalid configuration.
func New(c *Config) (log zerolog.Logger) {
zLevel, err := zerolog.ParseLevel(c.Level)
if err != nil {
panic(err.Error())
}
log = zerolog.New(termOut(c)).
Level(zLevel).
With().Timestamp().Caller().
Logger()
return log
}

126
pkg/mimer/mimer.go Normal file
View File

@@ -0,0 +1,126 @@
// Frontline Perception System
// Copyright (C) 2020-2025 TurbineOne LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// mimer is a helper package to determine the mime type of a file.
package mimer
import (
"fmt"
"io"
"os"
"strings"
"github.com/aofei/mimesniffer"
)
// MediaTypeNITF is the media type for NITF files.
const (
MediaTypeFuser = "application/vnd.turbineone.fuser"
MediaTypeMISBJSON = "application/vnd.misb0601+json"
MediaTypeNITF = "application/vnd.nitf"
MediaTypeJPEG = "image/jpeg"
MediaTypeJPEG2000 = "image/jp2"
MediaTypeM3U = "application/x-mpegurl"
UnknownMediaType = "application/octet-stream"
)
// isVideoTsSignature returns true if the given buffer is a video.ts file.
// According to https://en.wikipedia.org/wiki/List_of_file_signatures,
// the hex value 0x47 should be the first byte of a video.ts file and
// repeated every 188 bytes.
func isVideoTsSignature(buffer []byte) bool {
const (
tsSignature = 0x47
tsSignatureInterval = 188
)
if len(buffer) < tsSignatureInterval {
return false
}
for i := 0; i < len(buffer); i += tsSignatureInterval {
if buffer[i] != tsSignature {
return false
}
}
return true
}
// isNITFSignature returns true if the buffer has a NITF file signature.
// Signature based on the NITF spec here, section 5.11.1:
// https://nsgreg.nga.mil/doc/view?i=5533&month=8&day=16&year=2024
func isNITFSignature(buffer []byte) bool {
const nitfSignature = "NITF02.10"
if len(buffer) < len(nitfSignature) {
return false
}
return strings.HasPrefix(string(buffer), nitfSignature)
}
func isM3USignature(buffer []byte) bool {
const m3uSignature = "#EXTM3U"
if len(buffer) < len(m3uSignature) {
return false
}
return strings.HasPrefix(string(buffer), m3uSignature)
}
// init initializes the mimer package.
func init() {
mimesniffer.Register("video/mp2t", isVideoTsSignature)
mimesniffer.Register(MediaTypeNITF, isNITFSignature)
mimesniffer.Register(MediaTypeM3U, isM3USignature)
}
// GetContentType returns the content type of the given resource at the given path.
func GetContentTypeFromReader(reader io.Reader) (string, error) {
const fingerprintSize = 512
// Only the first 512 bytes are used to sniff the content type.
buffer := make([]byte, fingerprintSize)
_, err := reader.Read(buffer)
if err != nil {
return UnknownMediaType, fmt.Errorf("mime check failed read: %w", err)
}
mimeType := mimesniffer.Sniff(buffer)
return mimeType, nil
}
// GetContentType returns the content type of the given resource at the given path.
func GetContentType(sourcePath string) string {
// Get the content type of the file.
f, err := os.Open(sourcePath)
if err != nil {
return UnknownMediaType
}
defer func() {
_ = f.Close()
}()
mimeType, _ := GetContentTypeFromReader(f)
return mimeType
}