mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-11-01 04:12:58 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			218 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			218 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm
 | |
| // -lrt -lcblas
 | |
| // possible options:
 | |
| //    -DEIGEN_DONT_VECTORIZE
 | |
| //    -msse2
 | |
| 
 | |
| // #define EIGEN_DEFAULT_TO_ROW_MAJOR
 | |
| #define _FLOAT
 | |
| 
 | |
| #include <iostream>
 | |
| 
 | |
| #include <Eigen/Core>
 | |
| #include "BenchTimer.h"
 | |
| 
 | |
| // include the BLAS headers
 | |
| extern "C" {
 | |
| #include <cblas.h>
 | |
| }
 | |
| #include <string>
 | |
| 
 | |
| #ifdef _FLOAT
 | |
| typedef float Scalar;
 | |
| #define CBLAS_GEMM cblas_sgemm
 | |
| #else
 | |
| typedef double Scalar;
 | |
| #define CBLAS_GEMM cblas_dgemm
 | |
| #endif
 | |
| 
 | |
| typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MyMatrix;
 | |
| void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb,
 | |
|                      int nbloops);
 | |
| void check_product(int M, int N, int K);
 | |
| void check_product(void);
 | |
| 
 | |
| int main(int argc, char* argv[]) {
 | |
| // disable SSE exceptions
 | |
| #ifdef __GNUC__
 | |
|   {
 | |
|     int aux;
 | |
|     asm("stmxcsr   %[aux]           \n\t"
 | |
|         "orl       $32832, %[aux]   \n\t"
 | |
|         "ldmxcsr   %[aux]           \n\t"
 | |
|         :
 | |
|         : [aux] "m"(aux));
 | |
|   }
 | |
| #endif
 | |
| 
 | |
|   int nbtries = 1, nbloops = 1, M, N, K;
 | |
| 
 | |
|   if (argc == 2) {
 | |
|     if (std::string(argv[1]) == "check")
 | |
|       check_product();
 | |
|     else
 | |
|       M = N = K = atoi(argv[1]);
 | |
|   } else if ((argc == 3) && (std::string(argv[1]) == "auto")) {
 | |
|     M = N = K = atoi(argv[2]);
 | |
|     nbloops = 1000000000 / (M * M * M);
 | |
|     if (nbloops < 1) nbloops = 1;
 | |
|     nbtries = 6;
 | |
|   } else if (argc == 4) {
 | |
|     M = N = K = atoi(argv[1]);
 | |
|     nbloops = atoi(argv[2]);
 | |
|     nbtries = atoi(argv[3]);
 | |
|   } else if (argc == 6) {
 | |
|     M = atoi(argv[1]);
 | |
|     N = atoi(argv[2]);
 | |
|     K = atoi(argv[3]);
 | |
|     nbloops = atoi(argv[4]);
 | |
|     nbtries = atoi(argv[5]);
 | |
|   } else {
 | |
|     std::cout << "Usage: " << argv[0] << " size  \n";
 | |
|     std::cout << "Usage: " << argv[0] << " auto size\n";
 | |
|     std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
 | |
|     std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
 | |
|     std::cout << "Usage: " << argv[0] << " check\n";
 | |
|     std::cout << "Options:\n";
 | |
|     std::cout << "    size       unique size of the 2 matrices (integer)\n";
 | |
|     std::cout << "    auto       automatically set the number of repetitions "
 | |
|                  "and tries\n";
 | |
|     std::cout
 | |
|         << "    nbloops    number of times the GEMM routines is executed\n";
 | |
|     std::cout << "    nbtries    number of times the loop is benched (return "
 | |
|                  "the best try)\n";
 | |
|     std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN "
 | |
|                  "(integers)\n";
 | |
|     std::cout
 | |
|         << "    check      check eigen product using cblas as a reference\n";
 | |
|     exit(1);
 | |
|   }
 | |
| 
 | |
|   double nbmad = double(M) * double(N) * double(K) * double(nbloops);
 | |
| 
 | |
|   if (!(std::string(argv[1]) == "auto"))
 | |
|     std::cout << M << " x " << N << " x " << K << "\n";
 | |
| 
 | |
|   Scalar alpha, beta;
 | |
|   MyMatrix ma(M, K), mb(K, N), mc(M, N);
 | |
|   ma = MyMatrix::Random(M, K);
 | |
|   mb = MyMatrix::Random(K, N);
 | |
|   mc = MyMatrix::Random(M, N);
 | |
| 
 | |
|   Eigen::BenchTimer timer;
 | |
| 
 | |
|   // we simply compute c += a*b, so:
 | |
|   alpha = 1;
 | |
|   beta = 1;
 | |
| 
 | |
|   // bench cblas
 | |
|   // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
 | |
|   if (!(std::string(argv[1]) == "auto")) {
 | |
|     timer.reset();
 | |
|     for (uint k = 0; k < nbtries; ++k) {
 | |
|       timer.start();
 | |
|       for (uint j = 0; j < nbloops; ++j)
 | |
| #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
 | |
|         CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha,
 | |
|                    ma.data(), K, mb.data(), N, beta, mc.data(), N);
 | |
| #else
 | |
|         CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha,
 | |
|                    ma.data(), M, mb.data(), K, beta, mc.data(), M);
 | |
| #endif
 | |
|       timer.stop();
 | |
|     }
 | |
|     if (!(std::string(argv[1]) == "auto"))
 | |
|       std::cout << "cblas: " << timer.value() << " ("
 | |
|                 << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
 | |
|     else
 | |
|       std::cout << M << " : " << timer.value() << " ; "
 | |
|                 << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
 | |
|   }
 | |
| 
 | |
|   // clear
 | |
|   ma = MyMatrix::Random(M, K);
 | |
|   mb = MyMatrix::Random(K, N);
 | |
|   mc = MyMatrix::Random(M, N);
 | |
| 
 | |
|   // eigen
 | |
|   //   if (!(std::string(argv[1])=="auto"))
 | |
|   {
 | |
|     timer.reset();
 | |
|     for (uint k = 0; k < nbtries; ++k) {
 | |
|       timer.start();
 | |
|       bench_eigengemm(mc, ma, mb, nbloops);
 | |
|       timer.stop();
 | |
|     }
 | |
|     if (!(std::string(argv[1]) == "auto"))
 | |
|       std::cout << "eigen : " << timer.value() << " ("
 | |
|                 << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
 | |
|     else
 | |
|       std::cout << M << " : " << timer.value() << " ; "
 | |
|                 << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
 | |
|   }
 | |
| 
 | |
|   std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
 | |
|   std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| using namespace Eigen;
 | |
| 
 | |
| void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb,
 | |
|                      int nbloops) {
 | |
|   for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb;
 | |
| }
 | |
| 
 | |
| #define MYVERIFY(A, M)                  \
 | |
|   if (!(A)) {                           \
 | |
|     std::cout << "FAIL: " << M << "\n"; \
 | |
|   }
 | |
| void check_product(int M, int N, int K) {
 | |
|   MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N),
 | |
|       mref(M, N);
 | |
|   ma = MyMatrix::Random(M, K);
 | |
|   mb = MyMatrix::Random(K, N);
 | |
|   maT = ma.transpose();
 | |
|   mbT = mb.transpose();
 | |
|   mc = MyMatrix::Random(M, N);
 | |
| 
 | |
|   MyMatrix::Scalar eps = 1e-4;
 | |
| 
 | |
|   meigen = mref = mc;
 | |
|   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(),
 | |
|              M, mb.data(), K, 1, mref.data(), M);
 | |
|   meigen += ma * mb;
 | |
|   MYVERIFY(meigen.isApprox(mref, eps), ". * .");
 | |
| 
 | |
|   meigen = mref = mc;
 | |
|   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K,
 | |
|              mb.data(), K, 1, mref.data(), M);
 | |
|   meigen += maT.transpose() * mb;
 | |
|   MYVERIFY(meigen.isApprox(mref, eps), "T * .");
 | |
| 
 | |
|   meigen = mref = mc;
 | |
|   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K,
 | |
|              mbT.data(), N, 1, mref.data(), M);
 | |
|   meigen += (maT.transpose()) * (mbT.transpose());
 | |
|   MYVERIFY(meigen.isApprox(mref, eps), "T * T");
 | |
| 
 | |
|   meigen = mref = mc;
 | |
|   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M,
 | |
|              mbT.data(), N, 1, mref.data(), M);
 | |
|   meigen += ma * mbT.transpose();
 | |
|   MYVERIFY(meigen.isApprox(mref, eps), ". * T");
 | |
| }
 | |
| 
 | |
| void check_product(void) {
 | |
|   int M, N, K;
 | |
|   for (uint i = 0; i < 1000; ++i) {
 | |
|     M = internal::random<int>(1, 64);
 | |
|     N = internal::random<int>(1, 768);
 | |
|     K = internal::random<int>(1, 768);
 | |
|     M = (0 + M) * 1;
 | |
|     std::cout << M << " x " << N << " x " << K << "\n";
 | |
|     check_product(M, N, K);
 | |
|   }
 | |
| }
 | 
