mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-10-31 03:46:40 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			212 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			212 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the "License");
 | |
|  * you may not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  *     http://www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS,
 | |
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| #ifndef TRT_SAMPLE_REPORTING_H
 | |
| #define TRT_SAMPLE_REPORTING_H
 | |
| 
 | |
| #include <functional>
 | |
| #include <iostream>
 | |
| 
 | |
| #include "NvInfer.h"
 | |
| 
 | |
| #include "sampleOptions.h"
 | |
| #include "sampleUtils.h"
 | |
| 
 | |
| namespace sample {
 | |
| 
 | |
| //!
 | |
| //! \struct InferenceTime
 | |
| //! \brief Measurement times in milliseconds
 | |
| //!
 | |
| struct InferenceTime {
 | |
|   InferenceTime(float q, float i, float c, float o, float e)
 | |
|       : enq(q), h2d(i), compute(c), d2h(o), e2e(e) {}
 | |
| 
 | |
|   InferenceTime() = default;
 | |
|   InferenceTime(InferenceTime const&) = default;
 | |
|   InferenceTime(InferenceTime&&) = default;
 | |
|   InferenceTime& operator=(InferenceTime const&) = default;
 | |
|   InferenceTime& operator=(InferenceTime&&) = default;
 | |
|   ~InferenceTime() = default;
 | |
| 
 | |
|   float enq{0};     // Enqueue
 | |
|   float h2d{0};     // Host to Device
 | |
|   float compute{0}; // Compute
 | |
|   float d2h{0};     // Device to Host
 | |
|   float e2e{0};     // end to end
 | |
| 
 | |
|   // ideal latency
 | |
|   float latency() const { return h2d + compute + d2h; }
 | |
| };
 | |
| 
 | |
| //!
 | |
| //! \struct InferenceTrace
 | |
| //! \brief Measurement points in milliseconds
 | |
| //!
 | |
| struct InferenceTrace {
 | |
|   InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs,
 | |
|                  float ce, float os, float oe)
 | |
|       : stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie),
 | |
|         computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {}
 | |
| 
 | |
|   InferenceTrace() = default;
 | |
|   InferenceTrace(InferenceTrace const&) = default;
 | |
|   InferenceTrace(InferenceTrace&&) = default;
 | |
|   InferenceTrace& operator=(InferenceTrace const&) = default;
 | |
|   InferenceTrace& operator=(InferenceTrace&&) = default;
 | |
|   ~InferenceTrace() = default;
 | |
| 
 | |
|   int32_t stream{0};
 | |
|   float enqStart{0};
 | |
|   float enqEnd{0};
 | |
|   float h2dStart{0};
 | |
|   float h2dEnd{0};
 | |
|   float computeStart{0};
 | |
|   float computeEnd{0};
 | |
|   float d2hStart{0};
 | |
|   float d2hEnd{0};
 | |
| };
 | |
| 
 | |
| inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) {
 | |
|   return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute,
 | |
|                        a.d2h + b.d2h, a.e2e + b.e2e);
 | |
| }
 | |
| 
 | |
| inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) {
 | |
|   return a = a + b;
 | |
| }
 | |
| 
 | |
| //!
 | |
| //! \struct PerformanceResult
 | |
| //! \brief Performance result of a performance metric
 | |
| //!
 | |
| struct PerformanceResult {
 | |
|   float min{0};
 | |
|   float max{0};
 | |
|   float mean{0};
 | |
|   float median{0};
 | |
|   float percentile{0};
 | |
|   float coeffVar{0}; // coefficient of variation
 | |
| };
 | |
| 
 | |
| //!
 | |
| //! \brief Print benchmarking time and number of traces collected
 | |
| //!
 | |
| void printProlog(int32_t warmups, int32_t timings, float warmupMs,
 | |
|                  float walltime, std::ostream& os);
 | |
| 
 | |
| //!
 | |
| //! \brief Print a timing trace
 | |
| //!
 | |
| void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
 | |
|                  std::ostream& os);
 | |
| 
 | |
| //!
 | |
| //! \brief Print the performance summary of a trace
 | |
| //!
 | |
| void printEpilog(std::vector<InferenceTime> const& timings, float percentile,
 | |
|                  int32_t batchSize, std::ostream& osInfo,
 | |
|                  std::ostream& osWarning, std::ostream& osVerbose);
 | |
| 
 | |
| //!
 | |
| //! \brief Get the result of a specific performance metric from a trace
 | |
| //!
 | |
| PerformanceResult
 | |
| getPerformanceResult(std::vector<InferenceTime> const& timings,
 | |
|                      std::function<float(InferenceTime const&)> metricGetter,
 | |
|                      float percentile);
 | |
| 
 | |
| //!
 | |
| //! \brief Print the explanations of the performance metrics printed in
 | |
| //! printEpilog() function.
 | |
| //!
 | |
| void printMetricExplanations(std::ostream& os);
 | |
| 
 | |
| //!
 | |
| //! \brief Print and summarize a timing trace
 | |
| //!
 | |
| void printPerformanceReport(std::vector<InferenceTrace> const& trace,
 | |
|                             ReportingOptions const& reporting, float warmupMs,
 | |
|                             int32_t batchSize, std::ostream& osInfo,
 | |
|                             std::ostream& osWarning, std::ostream& osVerbose);
 | |
| 
 | |
| //!
 | |
| //! \brief Export a timing trace to JSON file
 | |
| //!
 | |
| void exportJSONTrace(std::vector<InferenceTrace> const& trace,
 | |
|                      std::string const& fileName);
 | |
| 
 | |
| //!
 | |
| //! \brief Print input tensors to stream
 | |
| //!
 | |
| void dumpInputs(nvinfer1::IExecutionContext const& context,
 | |
|                 Bindings const& bindings, std::ostream& os);
 | |
| 
 | |
| //!
 | |
| //! \brief Print output tensors to stream
 | |
| //!
 | |
| void dumpOutputs(nvinfer1::IExecutionContext const& context,
 | |
|                  Bindings const& bindings, std::ostream& os);
 | |
| 
 | |
| //!
 | |
| //! \brief Export output tensors to JSON file
 | |
| //!
 | |
| void exportJSONOutput(nvinfer1::IExecutionContext const& context,
 | |
|                       Bindings const& bindings, std::string const& fileName,
 | |
|                       int32_t batch);
 | |
| 
 | |
| //!
 | |
| //! \struct LayerProfile
 | |
| //! \brief Layer profile information
 | |
| //!
 | |
| struct LayerProfile {
 | |
|   std::string name;
 | |
|   float timeMs{0};
 | |
| };
 | |
| 
 | |
| //!
 | |
| //! \class Profiler
 | |
| //! \brief Collect per-layer profile information, assuming times are reported in
 | |
| //! the same order
 | |
| //!
 | |
| class Profiler : public nvinfer1::IProfiler {
 | |
|  public:
 | |
|   void reportLayerTime(char const* layerName, float timeMs) noexcept override;
 | |
| 
 | |
|   void print(std::ostream& os) const noexcept;
 | |
| 
 | |
|   //!
 | |
|   //! \brief Export a profile to JSON file
 | |
|   //!
 | |
|   void exportJSONProfile(std::string const& fileName) const noexcept;
 | |
| 
 | |
|  private:
 | |
|   float getTotalTime() const noexcept {
 | |
|     auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
 | |
|       return accumulator + lp.timeMs;
 | |
|     };
 | |
|     return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime);
 | |
|   }
 | |
| 
 | |
|   std::vector<LayerProfile> mLayers;
 | |
|   std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
 | |
|   int32_t mUpdatesCount{0};
 | |
| };
 | |
| 
 | |
| } // namespace sample
 | |
| 
 | |
| #endif // TRT_SAMPLE_REPORTING_H
 | 
