[Diffusion] Add C++ dpm solver (#714)

* Add BetaForAlphaBar, ConvertModelOutput, SetTimesteps, and constructor for DPMSolverMultistepScheduler * tmp * Add DPMSolverFirstOrderUpdate * Add ScaleModelInput * Add MultiStepDPMSolverSecondOrderUpdate * add MultiStepDPMSolverThirdOrderUpdate * Add Step * Add FASTDEPLOY_DECL * Add AddNoise * Fix operator * update * Fix DPMSolverMultistepScheduler * Upgrade Slice * Fix DPMSolverFirstOrderUpdate * remove FASTDEPLOY_DECL * Add config for dpm solver
2025-10-05 16:48:03 +08:00 · 2022-11-30 13:41:22 +08:00
parent 3f8ed9bfee
commit d95094cfe5
14 changed files with 675 additions and 11 deletions
--- a/fastdeploy/function/clip.cc
+++ b/fastdeploy/function/clip.cc
@@ -39,14 +39,15 @@ void ClipKernel(const FDTensor& x, double min, double max, FDTensor* out) {
           "max should be greater than or equal to min. But received min = %f, "
           "max = %f",
           static_cast<float>(min_), static_cast<float>(max_));
-
-  out->Allocate(x.Shape(), x.Dtype());
+  FDTensor tmp;
+  tmp.Allocate(x.Shape(), x.Dtype());
  const T* x_data = reinterpret_cast<const T*>(x.Data());

  int64_t numel = x.Numel();
-  T* out_data = reinterpret_cast<T*>(out->Data());
+  T* out_data = reinterpret_cast<T*>(tmp.Data());

  std::transform(x_data, x_data + numel, out_data, ClipFunctor<T>(min_, max_));
+  *out = std::move(tmp);
 }

 void Clip(const FDTensor& x, double min, double max, FDTensor* out) {
--- a/fastdeploy/function/elementwise.cc
+++ b/fastdeploy/function/elementwise.cc
@@ -86,4 +86,25 @@ FDTensor operator/(const FDTensor& x, const FDTensor& y) {
  return out;
 }

+#define INSTANTIATE_OPERATOR(operation_type)                                   \
+  template FDTensor operator operation_type(const FDTensor& x, bool y);        \
+  template FDTensor operator operation_type(const FDTensor& x, uint8_t y);     \
+  template FDTensor operator operation_type(const FDTensor& x, int16_t y);     \
+  template FDTensor operator operation_type(const FDTensor& x, int y);         \
+  template FDTensor operator operation_type(const FDTensor& x, int64_t y);     \
+  template FDTensor operator operation_type(const FDTensor& x, float y);       \
+  template FDTensor operator operation_type(const FDTensor& x, double y);      \
+  template FDTensor operator operation_type(bool x, const FDTensor& y);        \
+  template FDTensor operator operation_type(uint8_t x, const FDTensor& y);     \
+  template FDTensor operator operation_type(int16_t x, const FDTensor& y);     \
+  template FDTensor operator operation_type(int x, const FDTensor& y);         \
+  template FDTensor operator operation_type(int64_t x, const FDTensor& y);     \
+  template FDTensor operator operation_type(float x, const FDTensor& y);       \
+  template FDTensor operator operation_type(double x, const FDTensor& y)
+
+INSTANTIATE_OPERATOR(+);
+INSTANTIATE_OPERATOR(-);
+INSTANTIATE_OPERATOR(*);
+INSTANTIATE_OPERATOR(/);
+
 }  // namespace fastdeploy
--- a/fastdeploy/function/elementwise.h
+++ b/fastdeploy/function/elementwise.h
@@ -14,9 +14,11 @@

 #pragma once

+#include "fastdeploy/core/fd_scalar.h"
 #include "fastdeploy/core/fd_tensor.h"

 namespace fastdeploy {
+
 namespace function {

 /** Excute the add operation for input FDTensors. *out = x + y.
@@ -62,10 +64,42 @@ FASTDEPLOY_DECL void Maximum(const FDTensor& x, const FDTensor& y,

 FASTDEPLOY_DECL FDTensor operator+(const FDTensor& x, const FDTensor& y);

+template <typename T> FDTensor operator+(const FDTensor& x, T y) {
+  return x + FDTensor(Scalar(y));
+}
+
+template <typename T> FDTensor operator+(T x, const FDTensor& y) {
+  return FDTensor(Scalar(x)) + y;
+}
+
 FASTDEPLOY_DECL FDTensor operator-(const FDTensor& x, const FDTensor& y);

+template <typename T> FDTensor operator-(const FDTensor& x, T y) {
+  return x - FDTensor(Scalar(y));
+}
+
+template <typename T> FDTensor operator-(T x, const FDTensor& y) {
+  return FDTensor(Scalar(x)) - y;
+}
+
 FASTDEPLOY_DECL FDTensor operator*(const FDTensor& x, const FDTensor& y);

+template <typename T> FDTensor operator*(const FDTensor& x, T y) {
+  return x * FDTensor(Scalar(y));
+}
+
+template <typename T> FDTensor operator*(T x, const FDTensor& y) {
+  return FDTensor(Scalar(x)) * y;
+}
+
 FASTDEPLOY_DECL FDTensor operator/(const FDTensor& x, const FDTensor& y);

+template <typename T> FDTensor operator/(const FDTensor& x, T y) {
+  return x / FDTensor(Scalar(y));
+}
+
+template <typename T> FDTensor operator/(T x, const FDTensor& y) {
+  return FDTensor(Scalar(x)) / y;
+}
+
 }  // namespace fastdeploy
--- a/fastdeploy/function/elementwise_base.h
+++ b/fastdeploy/function/elementwise_base.h
@@ -213,10 +213,12 @@ void CommonElementwiseBroadcastForward(const FDTensor& x, const FDTensor& y,
  GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(),
                         y_dims_array.data(), out_dims_array.data(), max_dim,
                         axis);
-  z->Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
+  FDTensor tmp;
+  tmp.Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
  CommonForwardBroadcastCPU<Functor, T, OutType>(
-      x, y, z, x_dims_array.data(), y_dims_array.data(), out_dims_array.data(),
-      max_dim, func, is_xsize_larger);
+      x, y, &tmp, x_dims_array.data(), y_dims_array.data(),
+      out_dims_array.data(), max_dim, func, is_xsize_larger);
+  *z = std::move(tmp);
 }

 template <typename Functor, typename T, typename OutType = T>
--- a/fastdeploy/function/slice.cc
+++ b/fastdeploy/function/slice.cc
@@ -163,5 +163,20 @@ void Slice(const FDTensor& x, const std::vector<int64_t>& axes,
      }));
 }

+void Slice(const FDTensor& x, const std::vector<int64_t>& axes,
+           const std::vector<int64_t>& index, FDTensor* out) {
+  std::vector<int64_t> ends = index;
+  for (int i = 0; i < ends.size(); ++i) {
+    ends[i] += 1;
+  }
+  Slice(x, axes, index, ends, out);
+  for (int i = 0; i < axes.size(); ++i) {
+    if (out->Shape().size() <= 1) {
+      break;
+    }
+    out->Squeeze(axes[i]);
+  }
+}
+
 }  // namespace function
 }  // namespace fastdeploy
--- a/fastdeploy/function/slice.h
+++ b/fastdeploy/function/slice.h
@@ -37,5 +37,8 @@ FASTDEPLOY_DECL void Slice(const FDTensor& x, const std::vector<int64_t>& axes,
                           const std::vector<int64_t>& starts,
                           const std::vector<int64_t>& ends, FDTensor* out);

+FASTDEPLOY_DECL void Slice(const FDTensor& x, const std::vector<int64_t>& axes,
+                           const std::vector<int64_t>& index, FDTensor* out);
+
 }  // namespace function
 }  // namespace fastdeploy