diff --git a/blas/.travis.yml b/blas/.travis.yml new file mode 100644 index 00000000..c1472485 --- /dev/null +++ b/blas/.travis.yml @@ -0,0 +1,46 @@ +sudo: required + +language: go + +# Versions of go that are explicitly supported by gonum. +go: + - 1.5.4 + - 1.6.3 + - 1.7.3 + +os: + - linux + - osx + +env: + matrix: + - BLAS_LIB=OpenBLAS + - BLAS_LIB=gonum + - BLAS_LIB=Accelerate + - BLAS_LIB=ATLAS + +matrix: + exclude: + - os: linux + env: BLAS_LIB=Accelerate + - os: linux + env: BLAS_LIB=ATLAS + - os: osx + env: BLAS_LIB=ATLAS + - os: osx + env: BLAS_LIB=OpenBLAS + +# Cache used to persist the compiled BLAS library between CI calls. +cache: + directories: + - .travis/OpenBLAS.cache + +# Install the appropriate blas library (if any) and associated gonum software. +install: + - source ${TRAVIS_BUILD_DIR}/.travis/$TRAVIS_OS_NAME/$BLAS_LIB/install.sh + +script: + - source ${TRAVIS_BUILD_DIR}/.travis/$TRAVIS_OS_NAME/$BLAS_LIB/test.sh + - test -z "$(gofmt -d .)" + # This is run last since it alters the tree. + - ${TRAVIS_BUILD_DIR}/.travis/check-generate.sh diff --git a/blas/.travis/check-generate.sh b/blas/.travis/check-generate.sh new file mode 100755 index 00000000..167c98bf --- /dev/null +++ b/blas/.travis/check-generate.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -ex + +go generate github.com/gonum/blas/native +go generate github.com/gonum/blas/cgo +if [ -n "$(git diff)" ]; then + exit 1 +fi diff --git a/blas/.travis/linux/ATLAS/install.sh b/blas/.travis/linux/ATLAS/install.sh new file mode 100644 index 00000000..923dcf5e --- /dev/null +++ b/blas/.travis/linux/ATLAS/install.sh @@ -0,0 +1,21 @@ +set -ex + +# fetch and install ATLAS libs +sudo apt-get update -qq && sudo apt-get install -qq libatlas-base-dev + +# fetch and install gonum/blas against ATLAS +export CGO_LDFLAGS="-L/usr/lib -lblas" +go get github.com/gonum/blas + +# run the OS common installation script +source ${TRAVIS_BUILD_DIR}/.travis/$TRAVIS_OS_NAME/install.sh + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/linux/ATLAS/test.sh b/blas/.travis/linux/ATLAS/test.sh new file mode 100644 index 00000000..37dc167d --- /dev/null +++ b/blas/.travis/linux/ATLAS/test.sh @@ -0,0 +1,17 @@ +set -ex + +go env +go get -d -t -v ./... +go test -a -v ./... +go test -a -tags noasm -v ./... +if [[ $TRAVIS_SECURE_ENV_VARS = "true" ]]; then bash -c "$GOPATH/src/github.com/$TRAVIS_REPO_SLUG/.travis/test-coverage.sh"; fi + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/linux/OpenBLAS/install.sh b/blas/.travis/linux/OpenBLAS/install.sh new file mode 100644 index 00000000..97c4ace3 --- /dev/null +++ b/blas/.travis/linux/OpenBLAS/install.sh @@ -0,0 +1,71 @@ +set -ex + +CACHE_DIR=${TRAVIS_BUILD_DIR}/.travis/${BLAS_LIB}.cache + +# fetch fortran to build OpenBLAS +sudo apt-get update -qq && sudo apt-get install -qq gfortran + +# check if cache exists +if [ -e ${CACHE_DIR}/last_commit_id ]; then + echo "Cache $CACHE_DIR hit" + LAST_COMMIT="$(git ls-remote git://github.com/xianyi/OpenBLAS HEAD | grep -o '^\S*')" + CACHED_COMMIT="$(cat ${CACHE_DIR}/last_commit_id)" + # determine current OpenBLAS master commit id and compare + # with commit id in cache directory + if [ "$LAST_COMMIT" != "$CACHED_COMMIT" ]; then + echo "Cache Directory $CACHE_DIR has stale commit" + # if commit is different, delete the cache + rm -rf ${CACHE_DIR} + fi +fi + +if [ ! -e ${CACHE_DIR}/last_commit_id ]; then + # Clear cache. + rm -rf ${CACHE_DIR} + + # cache generation + echo "Building cache at $CACHE_DIR" + mkdir ${CACHE_DIR} + sudo git clone --depth=1 git://github.com/xianyi/OpenBLAS + + pushd OpenBLAS + sudo make FC=gfortran &> /dev/null && sudo make PREFIX=${CACHE_DIR} install + popd + + curl http://www.netlib.org/blas/blast-forum/cblas.tgz | tar -zx + + pushd CBLAS + sudo mv Makefile.LINUX Makefile.in + sudo BLLIB=${CACHE_DIR}/lib/libopenblas.a make alllib + sudo mv lib/cblas_LINUX.a ${CACHE_DIR}/lib/libcblas.a + popd + + # Record commit id used to generate cache. + pushd OpenBLAS + echo $(git rev-parse HEAD) > ${CACHE_DIR}/last_commit_id + popd + +fi + +# copy the cache files into /usr +sudo cp -r ${CACHE_DIR}/* /usr/ + +# install gonum/blas against OpenBLAS +export CGO_LDFLAGS="-L/usr/lib -lopenblas" +go get github.com/gonum/blas +pushd cgo +go install -v -x +popd + +# run the OS common installation script +source ${TRAVIS_BUILD_DIR}/.travis/$TRAVIS_OS_NAME/install.sh + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/linux/OpenBLAS/test.sh b/blas/.travis/linux/OpenBLAS/test.sh new file mode 100644 index 00000000..37dc167d --- /dev/null +++ b/blas/.travis/linux/OpenBLAS/test.sh @@ -0,0 +1,17 @@ +set -ex + +go env +go get -d -t -v ./... +go test -a -v ./... +go test -a -tags noasm -v ./... +if [[ $TRAVIS_SECURE_ENV_VARS = "true" ]]; then bash -c "$GOPATH/src/github.com/$TRAVIS_REPO_SLUG/.travis/test-coverage.sh"; fi + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/linux/gonum/install.sh b/blas/.travis/linux/gonum/install.sh new file mode 100644 index 00000000..266fa0ca --- /dev/null +++ b/blas/.travis/linux/gonum/install.sh @@ -0,0 +1,18 @@ +set -ex + +# run the OS common installation script +source ${TRAVIS_BUILD_DIR}/.travis/$TRAVIS_OS_NAME/install.sh + +# change to native directory so we don't test code that depends on an external +# blas library +cd native + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/linux/gonum/test.sh b/blas/.travis/linux/gonum/test.sh new file mode 100644 index 00000000..37dc167d --- /dev/null +++ b/blas/.travis/linux/gonum/test.sh @@ -0,0 +1,17 @@ +set -ex + +go env +go get -d -t -v ./... +go test -a -v ./... +go test -a -tags noasm -v ./... +if [[ $TRAVIS_SECURE_ENV_VARS = "true" ]]; then bash -c "$GOPATH/src/github.com/$TRAVIS_REPO_SLUG/.travis/test-coverage.sh"; fi + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/linux/install.sh b/blas/.travis/linux/install.sh new file mode 100644 index 00000000..842385a0 --- /dev/null +++ b/blas/.travis/linux/install.sh @@ -0,0 +1,21 @@ +set -ex + +# This script contains common installation commands for linux. It should be run +# prior to more specific installation commands for a particular blas library. +go get golang.org/x/tools/cmd/cover +go get github.com/mattn/goveralls +go get github.com/gonum/floats + +# Repositories for code generation. +go get github.com/gonum/internal/binding +go get github.com/cznic/cc + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/osx/Accelerate/install.sh b/blas/.travis/osx/Accelerate/install.sh new file mode 100644 index 00000000..1dd9a1d6 --- /dev/null +++ b/blas/.travis/osx/Accelerate/install.sh @@ -0,0 +1,20 @@ +set -ex + +export CGO_LDFLAGS="-framework Accelerate" +go get github.com/gonum/blas +pushd cgo +go install -v -x +popd + +# run the OS common installation script +source ${TRAVIS_BUILD_DIR}/.travis/$TRAVIS_OS_NAME/install.sh + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/osx/Accelerate/test.sh b/blas/.travis/osx/Accelerate/test.sh new file mode 100644 index 00000000..37dc167d --- /dev/null +++ b/blas/.travis/osx/Accelerate/test.sh @@ -0,0 +1,17 @@ +set -ex + +go env +go get -d -t -v ./... +go test -a -v ./... +go test -a -tags noasm -v ./... +if [[ $TRAVIS_SECURE_ENV_VARS = "true" ]]; then bash -c "$GOPATH/src/github.com/$TRAVIS_REPO_SLUG/.travis/test-coverage.sh"; fi + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/osx/OpenBLAS/install.sh b/blas/.travis/osx/OpenBLAS/install.sh new file mode 100644 index 00000000..5b60bdc3 --- /dev/null +++ b/blas/.travis/osx/OpenBLAS/install.sh @@ -0,0 +1,24 @@ +set -ex + +# fetch and install OpenBLAS using homebrew +brew install homebrew/science/openblas + +# fetch and install gonum/blas against OpenBLAS +export CGO_LDFLAGS="-L/usr/local/opt/openblas/lib -lopenblas" +go get github.com/gonum/blas +pushd cgo +go install -v -x +popd + +# run the OS common installation script +source ${TRAVIS_BUILD_DIR}/.travis/$TRAVIS_OS_NAME/install.sh + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/osx/OpenBLAS/test.sh b/blas/.travis/osx/OpenBLAS/test.sh new file mode 100644 index 00000000..37dc167d --- /dev/null +++ b/blas/.travis/osx/OpenBLAS/test.sh @@ -0,0 +1,17 @@ +set -ex + +go env +go get -d -t -v ./... +go test -a -v ./... +go test -a -tags noasm -v ./... +if [[ $TRAVIS_SECURE_ENV_VARS = "true" ]]; then bash -c "$GOPATH/src/github.com/$TRAVIS_REPO_SLUG/.travis/test-coverage.sh"; fi + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/osx/gonum/install.sh b/blas/.travis/osx/gonum/install.sh new file mode 100644 index 00000000..266fa0ca --- /dev/null +++ b/blas/.travis/osx/gonum/install.sh @@ -0,0 +1,18 @@ +set -ex + +# run the OS common installation script +source ${TRAVIS_BUILD_DIR}/.travis/$TRAVIS_OS_NAME/install.sh + +# change to native directory so we don't test code that depends on an external +# blas library +cd native + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/osx/gonum/test.sh b/blas/.travis/osx/gonum/test.sh new file mode 100644 index 00000000..37dc167d --- /dev/null +++ b/blas/.travis/osx/gonum/test.sh @@ -0,0 +1,17 @@ +set -ex + +go env +go get -d -t -v ./... +go test -a -v ./... +go test -a -tags noasm -v ./... +if [[ $TRAVIS_SECURE_ENV_VARS = "true" ]]; then bash -c "$GOPATH/src/github.com/$TRAVIS_REPO_SLUG/.travis/test-coverage.sh"; fi + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/osx/install.sh b/blas/.travis/osx/install.sh new file mode 100644 index 00000000..0403e7c8 --- /dev/null +++ b/blas/.travis/osx/install.sh @@ -0,0 +1,23 @@ +set -ex + +#!/bin/bash + +# This script contains common installation commands for osx. It should be run +# prior to more specific installation commands for a particular blas library. +go get golang.org/x/tools/cmd/cover +go get github.com/mattn/goveralls +go get github.com/gonum/floats + +# Repositories for code generation. +go get github.com/gonum/internal/binding +go get github.com/cznic/cc + +# travis compiles commands in script and then executes in bash. By adding +# set -e we are changing the travis build script's behavior, and the set +# -e lives on past the commands we are providing it. Some of the travis +# commands are supposed to exit with non zero status, but then continue +# executing. set -x makes the travis log files extremely verbose and +# difficult to understand. +# +# see travis-ci/travis-ci#5120 +set +ex diff --git a/blas/.travis/test-coverage.sh b/blas/.travis/test-coverage.sh new file mode 100755 index 00000000..7df8aa6a --- /dev/null +++ b/blas/.travis/test-coverage.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +PROFILE_OUT=$PWD/profile.out +ACC_OUT=$PWD/acc.out + +testCover() { + # set the return value to 0 (succesful) + retval=0 + # get the directory to check from the parameter. Default to '.' + d=${1:-.} + # skip if there are no Go files here + ls $d/*.go &> /dev/null || return $retval + # switch to the directory to check + pushd $d > /dev/null + # create the coverage profile + coverageresult=`go test -v -coverprofile=$PROFILE_OUT` + # output the result so we can check the shell output + echo ${coverageresult} + # append the results to acc.out if coverage didn't fail, else set the retval to 1 (failed) + ( [[ ${coverageresult} == *FAIL* ]] && retval=1 ) || ( [ -f $PROFILE_OUT ] && grep -v "mode: set" $PROFILE_OUT >> $ACC_OUT ) + # return to our working dir + popd > /dev/null + # return our return value + return $retval +} + +# Init acc.out +echo "mode: set" > $ACC_OUT + +# Run test coverage on all directories containing go files +find . -maxdepth 10 -type d | while read d; do testCover $d || exit; done + +# Upload the coverage profile to coveralls.io +[ -n "$COVERALLS_TOKEN" ] && goveralls -coverprofile=$ACC_OUT -service=travis-ci -repotoken $COVERALLS_TOKEN + diff --git a/blas/README.md b/blas/README.md new file mode 100644 index 00000000..5e5cf886 --- /dev/null +++ b/blas/README.md @@ -0,0 +1,96 @@ +# Gonum BLAS [![Build Status](https://travis-ci.org/gonum/blas.svg?branch=master)](https://travis-ci.org/gonum/blas) [![Coverage Status](https://coveralls.io/repos/gonum/blas/badge.svg?branch=master&service=github)](https://coveralls.io/github/gonum/blas?branch=master) [![GoDoc](https://godoc.org/github.com/gonum/blas?status.svg)](https://godoc.org/github.com/gonum/blas) + +A collection of packages to provide BLAS functionality for the [Go programming +language](http://golang.org) + +## Installation +```sh + go get github.com/gonum/blas +``` + +### BLAS C-bindings + +If you want to use OpenBLAS, install it in any directory: +```sh + git clone https://github.com/xianyi/OpenBLAS + cd OpenBLAS + make +``` + +The blas/cgo package provides bindings to C-backed BLAS packages. blas/cgo needs the `CGO_LDFLAGS` +environment variable to point to the blas installation. More information can be found in the +[cgo command documentation](http://golang.org/cmd/cgo/). + +Then install the blas/cgo package: +```sh + CGO_LDFLAGS="-L/path/to/OpenBLAS -lopenblas" go install github.com/gonum/blas/cgo +``` + +For Windows you can download binary packages for OpenBLAS at +[SourceForge](http://sourceforge.net/projects/openblas/files/). + +If you want to use a different BLAS package such as the Intel MKL you can +adjust the `CGO_LDFLAGS` variable: +```sh + CGO_LDFLAGS="-lmkl_rt" go install github.com/gonum/blas/cgo +``` + +On OS X the easiest solution is to use the libraries provided by the system: +```sh + CGO_LDFLAGS="-framework Accelerate" go install github.com/gonum/blas/cgo +``` + +## Packages + +### blas + +Defines [BLAS API](http://www.netlib.org/blas/blast-forum/cinterface.pdf) split in several +interfaces. + +### blas/native + +Go implementation of the BLAS API (incomplete, implements the `float32` and `float64` API) + +### blas/cgo + +Binding to a C implementation of the cblas interface (e.g. ATLAS, OpenBLAS, Intel MKL) + +The recommended (free) option for good performance on both Linux and Darwin is OpenBLAS. + +### blas/blas64 and blas/blas32 + +Wrappers for an implementation of the double (i.e., `float64`) and single (`float32`) +precision real parts of the blas API + +```Go +package main + +import ( + "fmt" + + "github.com/gonum/blas/blas64" +) + +func main() { + v := blas64.Vector{Inc: 1, Data: []float64{1, 1, 1}} + fmt.Println("v has length:", blas64.Nrm2(len(v.Data), v)) +} +``` + +### blas/cblas128 and blas/cblas64 + +Wrappers for an implementation of the double (i.e., `complex128`) and single (`complex64`) +precision complex parts of the blas API + +Currently blas/cblas64 and blas/cblas128 require blas/cgo. + +## Issues + +If you find any bugs, feel free to file an issue on the github issue tracker. +Discussions on API changes, added features, code review, or similar requests +are preferred on the [gonum-dev Google Group](https://groups.google.com/forum/#!forum/gonum-dev). + +## License + +Please see [github.com/gonum/license](https://github.com/gonum/license) for general +license information, contributors, authors, etc on the Gonum suite of packages. diff --git a/blas/blas.go b/blas/blas.go new file mode 100644 index 00000000..6c14aac4 --- /dev/null +++ b/blas/blas.go @@ -0,0 +1,388 @@ +// Copyright ©2013 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package blas provides interfaces for the BLAS linear algebra standard. + +All methods must perform appropriate parameter checking and panic if +provided parameters that do not conform to the requirements specified +by the BLAS standard. + +Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html + +This version is modified to remove the "order" option. All matrix operations are +on row-order matrices. + +Level 1 BLAS + + dim scalar vector vector scalars 5-element prefixes + struct + + _rotg ( a, b ) S, D + _rotmg( d1, d2, a, b ) S, D + _rot ( n, x, incX, y, incY, c, s ) S, D + _rotm ( n, x, incX, y, incY, param ) S, D + _swap ( n, x, incX, y, incY ) S, D, C, Z + _scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd + _copy ( n, x, incX, y, incY ) S, D, C, Z + _axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z + _dot ( n, x, incX, y, incY ) S, D, Ds + _dotu ( n, x, incX, y, incY ) C, Z + _dotc ( n, x, incX, y, incY ) C, Z + __dot ( n, alpha, x, incX, y, incY ) Sds + _nrm2 ( n, x, incX ) S, D, Sc, Dz + _asum ( n, x, incX ) S, D, Sc, Dz + I_amax( n, x, incX ) s, d, c, z + +Level 2 BLAS + + options dim b-width scalar matrix vector scalar vector prefixes + + _gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z + _gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z + _hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z + _hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z + _hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z + _symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D + _sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D + _spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D + _trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z + _tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z + _tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z + _trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z + _tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z + _tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z + + options dim scalar vector vector matrix prefixes + + _ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D + _geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z + _gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z + _her ( uplo, n, alpha, x, incX, a, lda ) C, Z + _hpr ( uplo, n, alpha, x, incX, ap ) C, Z + _her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z + _hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z + _syr ( uplo, n, alpha, x, incX, a, lda ) S, D + _spr ( uplo, n, alpha, x, incX, ap ) S, D + _syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D + _spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D + +Level 3 BLAS + + options dim scalar matrix matrix scalar matrix prefixes + + _gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z + _symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z + _hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z + _syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z + _herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z + _syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z + _her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z + _trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z + _trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z + +Meaning of prefixes + + S - float32 C - complex64 + D - float64 Z - complex128 + +Matrix types + + GE - GEneral GB - General Band + SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed + HE - HErmitian HB - Hermitian Band HP - Hermitian Packed + TR - TRiangular TB - Triangular Band TP - Triangular Packed + +Options + + trans = NoTrans, Trans, ConjTrans + uplo = Upper, Lower + diag = Nonunit, Unit + side = Left, Right (A or op(A) on the left, or A or op(A) on the right) + +For real matrices, Trans and ConjTrans have the same meaning. +For Hermitian matrices, trans = Trans is not allowed. +For complex symmetric matrices, trans = ConjTrans is not allowed. +*/ +package blas + +// Flag constants indicate Givens transformation H matrix state. +type Flag int + +const ( + Identity Flag = iota - 2 // H is the identity matrix; no rotation is needed. + Rescaling // H specifies rescaling. + OffDiagonal // Off-diagonal elements of H are units. + Diagonal // Diagonal elements of H are units. +) + +// SrotmParams contains Givens transformation parameters returned +// by the Float32 Srotm method. +type SrotmParams struct { + Flag + H [4]float32 // Column-major 2 by 2 matrix. +} + +// DrotmParams contains Givens transformation parameters returned +// by the Float64 Drotm method. +type DrotmParams struct { + Flag + H [4]float64 // Column-major 2 by 2 matrix. +} + +// Transpose is used to specify the transposition operation for a +// routine. +type Transpose int + +const ( + NoTrans Transpose = 111 + iota + Trans + ConjTrans +) + +// Uplo is used to specify whether the matrix is an upper or lower +// triangular matrix. +type Uplo int + +const ( + All Uplo = 120 + iota + Upper + Lower +) + +// Diag is used to specify whether the matrix is a unit or non-unit +// triangular matrix. +type Diag int + +const ( + NonUnit Diag = 131 + iota + Unit +) + +// Side is used to specify from which side a multiplication operation +// is performed. +type Side int + +const ( + Left Side = 141 + iota + Right +) + +// Float32 implements the single precision real BLAS routines. +type Float32 interface { + Float32Level1 + Float32Level2 + Float32Level3 +} + +// Float32Level1 implements the single precision real BLAS Level 1 routines. +type Float32Level1 interface { + Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32 + Dsdot(n int, x []float32, incX int, y []float32, incY int) float64 + Sdot(n int, x []float32, incX int, y []float32, incY int) float32 + Snrm2(n int, x []float32, incX int) float32 + Sasum(n int, x []float32, incX int) float32 + Isamax(n int, x []float32, incX int) int + Sswap(n int, x []float32, incX int, y []float32, incY int) + Scopy(n int, x []float32, incX int, y []float32, incY int) + Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int) + Srotg(a, b float32) (c, s, r, z float32) + Srotmg(d1, d2, b1, b2 float32) (p SrotmParams, rd1, rd2, rb1 float32) + Srot(n int, x []float32, incX int, y []float32, incY int, c, s float32) + Srotm(n int, x []float32, incX int, y []float32, incY int, p SrotmParams) + Sscal(n int, alpha float32, x []float32, incX int) +} + +// Float32Level2 implements the single precision real BLAS Level 2 routines. +type Float32Level2 interface { + Sgemv(tA Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) + Sgbmv(tA Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) + Strmv(ul Uplo, tA Transpose, d Diag, n int, a []float32, lda int, x []float32, incX int) + Stbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []float32, lda int, x []float32, incX int) + Stpmv(ul Uplo, tA Transpose, d Diag, n int, ap []float32, x []float32, incX int) + Strsv(ul Uplo, tA Transpose, d Diag, n int, a []float32, lda int, x []float32, incX int) + Stbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []float32, lda int, x []float32, incX int) + Stpsv(ul Uplo, tA Transpose, d Diag, n int, ap []float32, x []float32, incX int) + Ssymv(ul Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) + Ssbmv(ul Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) + Sspmv(ul Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int) + Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) + Ssyr(ul Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) + Sspr(ul Uplo, n int, alpha float32, x []float32, incX int, ap []float32) + Ssyr2(ul Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) + Sspr2(ul Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32) +} + +// Float32Level3 implements the single precision real BLAS Level 3 routines. +type Float32Level3 interface { + Sgemm(tA, tB Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) + Ssymm(s Side, ul Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) + Ssyrk(ul Uplo, t Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) + Ssyr2k(ul Uplo, t Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) + Strmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) + Strsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) +} + +// Float64 implements the single precision real BLAS routines. +type Float64 interface { + Float64Level1 + Float64Level2 + Float64Level3 +} + +// Float64Level1 implements the double precision real BLAS Level 1 routines. +type Float64Level1 interface { + Ddot(n int, x []float64, incX int, y []float64, incY int) float64 + Dnrm2(n int, x []float64, incX int) float64 + Dasum(n int, x []float64, incX int) float64 + Idamax(n int, x []float64, incX int) int + Dswap(n int, x []float64, incX int, y []float64, incY int) + Dcopy(n int, x []float64, incX int, y []float64, incY int) + Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) + Drotg(a, b float64) (c, s, r, z float64) + Drotmg(d1, d2, b1, b2 float64) (p DrotmParams, rd1, rd2, rb1 float64) + Drot(n int, x []float64, incX int, y []float64, incY int, c float64, s float64) + Drotm(n int, x []float64, incX int, y []float64, incY int, p DrotmParams) + Dscal(n int, alpha float64, x []float64, incX int) +} + +// Float64Level2 implements the double precision real BLAS Level 2 routines. +type Float64Level2 interface { + Dgemv(tA Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) + Dgbmv(tA Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) + Dtrmv(ul Uplo, tA Transpose, d Diag, n int, a []float64, lda int, x []float64, incX int) + Dtbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []float64, lda int, x []float64, incX int) + Dtpmv(ul Uplo, tA Transpose, d Diag, n int, ap []float64, x []float64, incX int) + Dtrsv(ul Uplo, tA Transpose, d Diag, n int, a []float64, lda int, x []float64, incX int) + Dtbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []float64, lda int, x []float64, incX int) + Dtpsv(ul Uplo, tA Transpose, d Diag, n int, ap []float64, x []float64, incX int) + Dsymv(ul Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) + Dsbmv(ul Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) + Dspmv(ul Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int) + Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) + Dsyr(ul Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) + Dspr(ul Uplo, n int, alpha float64, x []float64, incX int, ap []float64) + Dsyr2(ul Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) + Dspr2(ul Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64) +} + +// Float64Level3 implements the double precision real BLAS Level 3 routines. +type Float64Level3 interface { + Dgemm(tA, tB Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) + Dsymm(s Side, ul Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) + Dsyrk(ul Uplo, t Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) + Dsyr2k(ul Uplo, t Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) + Dtrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) + Dtrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) +} + +// Complex64 implements the single precision complex BLAS routines. +type Complex64 interface { + Complex64Level1 + Complex64Level2 + Complex64Level3 +} + +// Complex64Level1 implements the single precision complex BLAS Level 1 routines. +type Complex64Level1 interface { + Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64) + Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64) + Scnrm2(n int, x []complex64, incX int) float32 + Scasum(n int, x []complex64, incX int) float32 + Icamax(n int, x []complex64, incX int) int + Cswap(n int, x []complex64, incX int, y []complex64, incY int) + Ccopy(n int, x []complex64, incX int, y []complex64, incY int) + Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) + Cscal(n int, alpha complex64, x []complex64, incX int) + Csscal(n int, alpha float32, x []complex64, incX int) +} + +// Complex64Level2 implements the single precision complex BLAS routines Level 2 routines. +type Complex64Level2 interface { + Cgemv(tA Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) + Cgbmv(tA Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) + Ctrmv(ul Uplo, tA Transpose, d Diag, n int, a []complex64, lda int, x []complex64, incX int) + Ctbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex64, lda int, x []complex64, incX int) + Ctpmv(ul Uplo, tA Transpose, d Diag, n int, ap []complex64, x []complex64, incX int) + Ctrsv(ul Uplo, tA Transpose, d Diag, n int, a []complex64, lda int, x []complex64, incX int) + Ctbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex64, lda int, x []complex64, incX int) + Ctpsv(ul Uplo, tA Transpose, d Diag, n int, ap []complex64, x []complex64, incX int) + Chemv(ul Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) + Chbmv(ul Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) + Chpmv(ul Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int) + Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) + Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) + Cher(ul Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int) + Chpr(ul Uplo, n int, alpha float32, x []complex64, incX int, a []complex64) + Cher2(ul Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) + Chpr2(ul Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64) +} + +// Complex64Level3 implements the single precision complex BLAS Level 3 routines. +type Complex64Level3 interface { + Cgemm(tA, tB Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) + Csymm(s Side, ul Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) + Csyrk(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) + Csyr2k(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) + Ctrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) + Ctrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) + Chemm(s Side, ul Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) + Cherk(ul Uplo, t Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) + Cher2k(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) +} + +// Complex128 implements the double precision complex BLAS routines. +type Complex128 interface { + Complex128Level1 + Complex128Level2 + Complex128Level3 +} + +// Complex128Level1 implements the double precision complex BLAS Level 1 routines. +type Complex128Level1 interface { + Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128) + Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128) + Dznrm2(n int, x []complex128, incX int) float64 + Dzasum(n int, x []complex128, incX int) float64 + Izamax(n int, x []complex128, incX int) int + Zswap(n int, x []complex128, incX int, y []complex128, incY int) + Zcopy(n int, x []complex128, incX int, y []complex128, incY int) + Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) + Zscal(n int, alpha complex128, x []complex128, incX int) + Zdscal(n int, alpha float64, x []complex128, incX int) +} + +// Complex128Level2 implements the double precision complex BLAS Level 2 routines. +type Complex128Level2 interface { + Zgemv(tA Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) + Zgbmv(tA Transpose, m, n int, kL int, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) + Ztrmv(ul Uplo, tA Transpose, d Diag, n int, a []complex128, lda int, x []complex128, incX int) + Ztbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex128, lda int, x []complex128, incX int) + Ztpmv(ul Uplo, tA Transpose, d Diag, n int, ap []complex128, x []complex128, incX int) + Ztrsv(ul Uplo, tA Transpose, d Diag, n int, a []complex128, lda int, x []complex128, incX int) + Ztbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex128, lda int, x []complex128, incX int) + Ztpsv(ul Uplo, tA Transpose, d Diag, n int, ap []complex128, x []complex128, incX int) + Zhemv(ul Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) + Zhbmv(ul Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) + Zhpmv(ul Uplo, n int, alpha complex128, ap []complex128, x []complex128, incX int, beta complex128, y []complex128, incY int) + Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) + Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) + Zher(ul Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int) + Zhpr(ul Uplo, n int, alpha float64, x []complex128, incX int, a []complex128) + Zher2(ul Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) + Zhpr2(ul Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, ap []complex128) +} + +// Complex128Level3 implements the double precision complex BLAS Level 3 routines. +type Complex128Level3 interface { + Zgemm(tA, tB Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) + Zsymm(s Side, ul Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) + Zsyrk(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) + Zsyr2k(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) + Ztrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) + Ztrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) + Zhemm(s Side, ul Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) + Zherk(ul Uplo, t Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) + Zher2k(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) +} diff --git a/blas/blas32/blas32.go b/blas/blas32/blas32.go new file mode 100644 index 00000000..82971ea1 --- /dev/null +++ b/blas/blas32/blas32.go @@ -0,0 +1,458 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package blas32 provides a simple interface to the float32 BLAS API. +package blas32 + +import ( + "github.com/gonum/blas" + "github.com/gonum/blas/native" +) + +var blas32 blas.Float32 = native.Implementation{} + +// Use sets the BLAS float32 implementation to be used by subsequent BLAS calls. +// The default implementation is native.Implementation. +func Use(b blas.Float32) { + blas32 = b +} + +// Implementation returns the current BLAS float32 implementation. +// +// Implementation allows direct calls to the current the BLAS float32 implementation +// giving finer control of parameters. +func Implementation() blas.Float32 { + return blas32 +} + +// Vector represents a vector with an associated element increment. +type Vector struct { + Inc int + Data []float32 +} + +// General represents a matrix using the conventional storage scheme. +type General struct { + Rows, Cols int + Stride int + Data []float32 +} + +// Band represents a band matrix using the band storage scheme. +type Band struct { + Rows, Cols int + KL, KU int + Stride int + Data []float32 +} + +// Triangular represents a triangular matrix using the conventional storage scheme. +type Triangular struct { + N int + Stride int + Data []float32 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularBand represents a triangular matrix using the band storage scheme. +type TriangularBand struct { + N, K int + Stride int + Data []float32 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularPacked represents a triangular matrix using the packed storage scheme. +type TriangularPacked struct { + N int + Data []float32 + Uplo blas.Uplo + Diag blas.Diag +} + +// Symmetric represents a symmetric matrix using the conventional storage scheme. +type Symmetric struct { + N int + Stride int + Data []float32 + Uplo blas.Uplo +} + +// SymmetricBand represents a symmetric matrix using the band storage scheme. +type SymmetricBand struct { + N, K int + Stride int + Data []float32 + Uplo blas.Uplo +} + +// SymmetricPacked represents a symmetric matrix using the packed storage scheme. +type SymmetricPacked struct { + N int + Data []float32 + Uplo blas.Uplo +} + +// Level 1 + +const negInc = "blas32: negative vector increment" + +// Dot computes the dot product of the two vectors: +// \sum_i x[i]*y[i]. +func Dot(n int, x, y Vector) float32 { + return blas32.Sdot(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// DDot computes the dot product of the two vectors: +// \sum_i x[i]*y[i]. +func DDot(n int, x, y Vector) float64 { + return blas32.Dsdot(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// SDDot computes the dot product of the two vectors adding a constant: +// alpha + \sum_i x[i]*y[i]. +func SDDot(n int, alpha float32, x, y Vector) float32 { + return blas32.Sdsdot(n, alpha, x.Data, x.Inc, y.Data, y.Inc) +} + +// Nrm2 computes the Euclidean norm of the vector x: +// sqrt(\sum_i x[i]*x[i]). +// +// Nrm2 will panic if the vector increment is negative. +func Nrm2(n int, x Vector) float32 { + if x.Inc < 0 { + panic(negInc) + } + return blas32.Snrm2(n, x.Data, x.Inc) +} + +// Asum computes the sum of the absolute values of the elements of x: +// \sum_i |x[i]|. +// +// Asum will panic if the vector increment is negative. +func Asum(n int, x Vector) float32 { + if x.Inc < 0 { + panic(negInc) + } + return blas32.Sasum(n, x.Data, x.Inc) +} + +// Iamax returns the index of an element of x with the largest absolute value. +// If there are multiple such indices the earliest is returned. +// Iamax returns -1 if n == 0. +// +// Iamax will panic if the vector increment is negative. +func Iamax(n int, x Vector) int { + if x.Inc < 0 { + panic(negInc) + } + return blas32.Isamax(n, x.Data, x.Inc) +} + +// Swap exchanges the elements of the two vectors: +// x[i], y[i] = y[i], x[i] for all i. +func Swap(n int, x, y Vector) { + blas32.Sswap(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Copy copies the elements of x into the elements of y: +// y[i] = x[i] for all i. +func Copy(n int, x, y Vector) { + blas32.Scopy(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Axpy adds x scaled by alpha to y: +// y[i] += alpha*x[i] for all i. +func Axpy(n int, alpha float32, x, y Vector) { + blas32.Saxpy(n, alpha, x.Data, x.Inc, y.Data, y.Inc) +} + +// Rotg computes the parameters of a Givens plane rotation so that +// ⎡ c s⎤ ⎡a⎤ ⎡r⎤ +// ⎣-s c⎦ * ⎣b⎦ = ⎣0⎦ +// where a and b are the Cartesian coordinates of a given point. +// c, s, and r are defined as +// r = ±Sqrt(a^2 + b^2), +// c = a/r, the cosine of the rotation angle, +// s = a/r, the sine of the rotation angle, +// and z is defined such that +// if |a| > |b|, z = s, +// otherwise if c != 0, z = 1/c, +// otherwise z = 1. +func Rotg(a, b float32) (c, s, r, z float32) { + return blas32.Srotg(a, b) +} + +// Rotmg computes the modified Givens rotation. See +// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html +// for more details. +func Rotmg(d1, d2, b1, b2 float32) (p blas.SrotmParams, rd1, rd2, rb1 float32) { + return blas32.Srotmg(d1, d2, b1, b2) +} + +// Rot applies a plane transformation to n points represented by the vectors x +// and y: +// x[i] = c*x[i] + s*y[i], +// y[i] = -s*x[i] + c*y[i], for all i. +func Rot(n int, x, y Vector, c, s float32) { + blas32.Srot(n, x.Data, x.Inc, y.Data, y.Inc, c, s) +} + +// Rotm applies the modified Givens rotation to n points represented by the +// vectors x and y. +func Rotm(n int, x, y Vector, p blas.SrotmParams) { + blas32.Srotm(n, x.Data, x.Inc, y.Data, y.Inc, p) +} + +// Scal scales the vector x by alpha: +// x[i] *= alpha for all i. +// +// Scal will panic if the vector increment is negative. +func Scal(n int, alpha float32, x Vector) { + if x.Inc < 0 { + panic(negInc) + } + blas32.Sscal(n, alpha, x.Data, x.Inc) +} + +// Level 2 + +// Gemv computes +// y = alpha * A * x + beta * y, if t == blas.NoTrans, +// y = alpha * A^T * x + beta * y, if t == blas.Trans or blas.ConjTrans, +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func Gemv(t blas.Transpose, alpha float32, a General, x Vector, beta float32, y Vector) { + blas32.Sgemv(t, a.Rows, a.Cols, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Gbmv computes +// y = alpha * A * x + beta * y, if t == blas.NoTrans, +// y = alpha * A^T * x + beta * y, if t == blas.Trans or blas.ConjTrans, +// where A is an m×n band matrix, x and y are vectors, and alpha and beta are scalars. +func Gbmv(t blas.Transpose, alpha float32, a Band, x Vector, beta float32, y Vector) { + blas32.Sgbmv(t, a.Rows, a.Cols, a.KL, a.KU, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Trmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular matrix, and x is a vector. +func Trmv(t blas.Transpose, a Triangular, x Vector) { + blas32.Strmv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular band matrix, and x is a vector. +func Tbmv(t blas.Transpose, a TriangularBand, x Vector) { + blas32.Stbmv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular matrix in packed format, and x is a vector. +func Tpmv(t blas.Transpose, a TriangularPacked, x Vector) { + blas32.Stpmv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Trsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular matrix, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Trsv(t blas.Transpose, a Triangular, x Vector) { + blas32.Strsv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular band matrix, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tbsv(t blas.Transpose, a TriangularBand, x Vector) { + blas32.Stbsv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular matrix in packed format, and x and b are +// vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tpsv(t blas.Transpose, a TriangularPacked, x Vector) { + blas32.Stpsv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Symv computes +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric matrix, x and y are vectors, and alpha and +// beta are scalars. +func Symv(alpha float32, a Symmetric, x Vector, beta float32, y Vector) { + blas32.Ssymv(a.Uplo, a.N, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Sbmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric band matrix, x and y are vectors, and alpha +// and beta are scalars. +func Sbmv(alpha float32, a SymmetricBand, x Vector, beta float32, y Vector) { + blas32.Ssbmv(a.Uplo, a.N, a.K, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Spmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha and beta are scalars. +func Spmv(alpha float32, a SymmetricPacked, x Vector, beta float32, y Vector) { + blas32.Sspmv(a.Uplo, a.N, alpha, a.Data, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Ger performs a rank-1 update +// A += alpha * x * y^T, +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Ger(alpha float32, x, y Vector, a General) { + blas32.Sger(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Syr performs a rank-1 update +// A += alpha * x * x^T, +// where A is an n×n symmetric matrix, x is a vector, and alpha is a scalar. +func Syr(alpha float32, x Vector, a Symmetric) { + blas32.Ssyr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data, a.Stride) +} + +// Spr performs the rank-1 update +// A += alpha * x * x^T, +// where A is an n×n symmetric matrix in packed format, x is a vector, and +// alpha is a scalar. +func Spr(alpha float32, x Vector, a SymmetricPacked) { + blas32.Sspr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data) +} + +// Syr2 performs a rank-2 update +// A += alpha * x * y^T + alpha * y * x^T, +// where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar. +func Syr2(alpha float32, x, y Vector, a Symmetric) { + blas32.Ssyr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Spr2 performs a rank-2 update +// A += alpha * x * y^T + alpha * y * x^T, +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func Spr2(alpha float32, x, y Vector, a SymmetricPacked) { + blas32.Sspr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data) +} + +// Level 3 + +// Gemm computes +// C = alpha * A * B + beta * C, +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed. +func Gemm(tA, tB blas.Transpose, alpha float32, a, b General, beta float32, c General) { + var m, n, k int + if tA == blas.NoTrans { + m, k = a.Rows, a.Cols + } else { + m, k = a.Cols, a.Rows + } + if tB == blas.NoTrans { + n = b.Cols + } else { + n = b.Rows + } + blas32.Sgemm(tA, tB, m, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Symm performs +// C = alpha * A * B + beta * C, if s == blas.Left, +// C = alpha * B * A + beta * C, if s == blas.Right, +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and +// alpha is a scalar. +func Symm(s blas.Side, alpha float32, a Symmetric, b General, beta float32, c General) { + var m, n int + if s == blas.Left { + m, n = a.N, b.Cols + } else { + m, n = b.Rows, a.N + } + blas32.Ssymm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Syrk performs a symmetric rank-k update +// C = alpha * A * A^T + beta * C, if t == blas.NoTrans, +// C = alpha * A^T * A + beta * C, if t == blas.Trans or blas.ConjTrans, +// where C is an n×n symmetric matrix, A is an n×k matrix if t == blas.NoTrans and +// a k×n matrix otherwise, and alpha and beta are scalars. +func Syrk(t blas.Transpose, alpha float32, a General, beta float32, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + blas32.Ssyrk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride) +} + +// Syr2k performs a symmetric rank-2k update +// C = alpha * A * B^T + alpha * B * A^T + beta * C, if t == blas.NoTrans, +// C = alpha * A^T * B + alpha * B^T * A + beta * C, if t == blas.Trans or blas.ConjTrans, +// where C is an n×n symmetric matrix, A and B are n×k matrices if t == NoTrans +// and k×n matrices otherwise, and alpha and beta are scalars. +func Syr2k(t blas.Transpose, alpha float32, a, b General, beta float32, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + blas32.Ssyr2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Trmm performs +// B = alpha * A * B, if tA == blas.NoTrans and s == blas.Left, +// B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Left, +// B = alpha * B * A, if tA == blas.NoTrans and s == blas.Right, +// B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and s == blas.Right, +// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is +// a scalar. +func Trmm(s blas.Side, tA blas.Transpose, alpha float32, a Triangular, b General) { + blas32.Strmm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} + +// Trsm solves +// A * X = alpha * B, if tA == blas.NoTrans and s == blas.Left, +// A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Left, +// X * A = alpha * B, if tA == blas.NoTrans and s == blas.Right, +// X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Right, +// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and +// alpha is a scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in-place into X. +// +// No check is made that A is invertible. +func Trsm(s blas.Side, tA blas.Transpose, alpha float32, a Triangular, b General) { + blas32.Strsm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} diff --git a/blas/blas64/blas64.go b/blas/blas64/blas64.go new file mode 100644 index 00000000..088c30e9 --- /dev/null +++ b/blas/blas64/blas64.go @@ -0,0 +1,446 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package blas64 provides a simple interface to the float64 BLAS API. +package blas64 + +import ( + "github.com/gonum/blas" + "github.com/gonum/blas/native" +) + +var blas64 blas.Float64 = native.Implementation{} + +// Use sets the BLAS float64 implementation to be used by subsequent BLAS calls. +// The default implementation is native.Implementation. +func Use(b blas.Float64) { + blas64 = b +} + +// Implementation returns the current BLAS float64 implementation. +// +// Implementation allows direct calls to the current the BLAS float64 implementation +// giving finer control of parameters. +func Implementation() blas.Float64 { + return blas64 +} + +// Vector represents a vector with an associated element increment. +type Vector struct { + Inc int + Data []float64 +} + +// General represents a matrix using the conventional storage scheme. +type General struct { + Rows, Cols int + Stride int + Data []float64 +} + +// Band represents a band matrix using the band storage scheme. +type Band struct { + Rows, Cols int + KL, KU int + Stride int + Data []float64 +} + +// Triangular represents a triangular matrix using the conventional storage scheme. +type Triangular struct { + N int + Stride int + Data []float64 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularBand represents a triangular matrix using the band storage scheme. +type TriangularBand struct { + N, K int + Stride int + Data []float64 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularPacked represents a triangular matrix using the packed storage scheme. +type TriangularPacked struct { + N int + Data []float64 + Uplo blas.Uplo + Diag blas.Diag +} + +// Symmetric represents a symmetric matrix using the conventional storage scheme. +type Symmetric struct { + N int + Stride int + Data []float64 + Uplo blas.Uplo +} + +// SymmetricBand represents a symmetric matrix using the band storage scheme. +type SymmetricBand struct { + N, K int + Stride int + Data []float64 + Uplo blas.Uplo +} + +// SymmetricPacked represents a symmetric matrix using the packed storage scheme. +type SymmetricPacked struct { + N int + Data []float64 + Uplo blas.Uplo +} + +// Level 1 + +const negInc = "blas64: negative vector increment" + +// Dot computes the dot product of the two vectors: +// \sum_i x[i]*y[i]. +func Dot(n int, x, y Vector) float64 { + return blas64.Ddot(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Nrm2 computes the Euclidean norm of the vector x: +// sqrt(\sum_i x[i]*x[i]). +// +// Nrm2 will panic if the vector increment is negative. +func Nrm2(n int, x Vector) float64 { + if x.Inc < 0 { + panic(negInc) + } + return blas64.Dnrm2(n, x.Data, x.Inc) +} + +// Asum computes the sum of the absolute values of the elements of x: +// \sum_i |x[i]|. +// +// Asum will panic if the vector increment is negative. +func Asum(n int, x Vector) float64 { + if x.Inc < 0 { + panic(negInc) + } + return blas64.Dasum(n, x.Data, x.Inc) +} + +// Iamax returns the index of an element of x with the largest absolute value. +// If there are multiple such indices the earliest is returned. +// Iamax returns -1 if n == 0. +// +// Iamax will panic if the vector increment is negative. +func Iamax(n int, x Vector) int { + if x.Inc < 0 { + panic(negInc) + } + return blas64.Idamax(n, x.Data, x.Inc) +} + +// Swap exchanges the elements of the two vectors: +// x[i], y[i] = y[i], x[i] for all i. +func Swap(n int, x, y Vector) { + blas64.Dswap(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Copy copies the elements of x into the elements of y: +// y[i] = x[i] for all i. +func Copy(n int, x, y Vector) { + blas64.Dcopy(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Axpy adds x scaled by alpha to y: +// y[i] += alpha*x[i] for all i. +func Axpy(n int, alpha float64, x, y Vector) { + blas64.Daxpy(n, alpha, x.Data, x.Inc, y.Data, y.Inc) +} + +// Rotg computes the parameters of a Givens plane rotation so that +// ⎡ c s⎤ ⎡a⎤ ⎡r⎤ +// ⎣-s c⎦ * ⎣b⎦ = ⎣0⎦ +// where a and b are the Cartesian coordinates of a given point. +// c, s, and r are defined as +// r = ±Sqrt(a^2 + b^2), +// c = a/r, the cosine of the rotation angle, +// s = a/r, the sine of the rotation angle, +// and z is defined such that +// if |a| > |b|, z = s, +// otherwise if c != 0, z = 1/c, +// otherwise z = 1. +func Rotg(a, b float64) (c, s, r, z float64) { + return blas64.Drotg(a, b) +} + +// Rotmg computes the modified Givens rotation. See +// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html +// for more details. +func Rotmg(d1, d2, b1, b2 float64) (p blas.DrotmParams, rd1, rd2, rb1 float64) { + return blas64.Drotmg(d1, d2, b1, b2) +} + +// Rot applies a plane transformation to n points represented by the vectors x +// and y: +// x[i] = c*x[i] + s*y[i], +// y[i] = -s*x[i] + c*y[i], for all i. +func Rot(n int, x, y Vector, c, s float64) { + blas64.Drot(n, x.Data, x.Inc, y.Data, y.Inc, c, s) +} + +// Rotm applies the modified Givens rotation to n points represented by the +// vectors x and y. +func Rotm(n int, x, y Vector, p blas.DrotmParams) { + blas64.Drotm(n, x.Data, x.Inc, y.Data, y.Inc, p) +} + +// Scal scales the vector x by alpha: +// x[i] *= alpha for all i. +// +// Scal will panic if the vector increment is negative. +func Scal(n int, alpha float64, x Vector) { + if x.Inc < 0 { + panic(negInc) + } + blas64.Dscal(n, alpha, x.Data, x.Inc) +} + +// Level 2 + +// Gemv computes +// y = alpha * A * x + beta * y, if t == blas.NoTrans, +// y = alpha * A^T * x + beta * y, if t == blas.Trans or blas.ConjTrans, +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. +func Gemv(t blas.Transpose, alpha float64, a General, x Vector, beta float64, y Vector) { + blas64.Dgemv(t, a.Rows, a.Cols, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Gbmv computes +// y = alpha * A * x + beta * y, if t == blas.NoTrans, +// y = alpha * A^T * x + beta * y, if t == blas.Trans or blas.ConjTrans, +// where A is an m×n band matrix, x and y are vectors, and alpha and beta are scalars. +func Gbmv(t blas.Transpose, alpha float64, a Band, x Vector, beta float64, y Vector) { + blas64.Dgbmv(t, a.Rows, a.Cols, a.KL, a.KU, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Trmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular matrix, and x is a vector. +func Trmv(t blas.Transpose, a Triangular, x Vector) { + blas64.Dtrmv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular band matrix, and x is a vector. +func Tbmv(t blas.Transpose, a TriangularBand, x Vector) { + blas64.Dtbmv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular matrix in packed format, and x is a vector. +func Tpmv(t blas.Transpose, a TriangularPacked, x Vector) { + blas64.Dtpmv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Trsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular matrix, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Trsv(t blas.Transpose, a Triangular, x Vector) { + blas64.Dtrsv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular band matrix, and x and b are vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tbsv(t blas.Transpose, a TriangularBand, x Vector) { + blas64.Dtbsv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans or blas.ConjTrans, +// where A is an n×n triangular matrix in packed format, and x and b are +// vectors. +// +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tpsv(t blas.Transpose, a TriangularPacked, x Vector) { + blas64.Dtpsv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Symv computes +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric matrix, x and y are vectors, and alpha and +// beta are scalars. +func Symv(alpha float64, a Symmetric, x Vector, beta float64, y Vector) { + blas64.Dsymv(a.Uplo, a.N, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Sbmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric band matrix, x and y are vectors, and alpha +// and beta are scalars. +func Sbmv(alpha float64, a SymmetricBand, x Vector, beta float64, y Vector) { + blas64.Dsbmv(a.Uplo, a.N, a.K, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Spmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha and beta are scalars. +func Spmv(alpha float64, a SymmetricPacked, x Vector, beta float64, y Vector) { + blas64.Dspmv(a.Uplo, a.N, alpha, a.Data, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Ger performs a rank-1 update +// A += alpha * x * y^T, +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Ger(alpha float64, x, y Vector, a General) { + blas64.Dger(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Syr performs a rank-1 update +// A += alpha * x * x^T, +// where A is an n×n symmetric matrix, x is a vector, and alpha is a scalar. +func Syr(alpha float64, x Vector, a Symmetric) { + blas64.Dsyr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data, a.Stride) +} + +// Spr performs the rank-1 update +// A += alpha * x * x^T, +// where A is an n×n symmetric matrix in packed format, x is a vector, and +// alpha is a scalar. +func Spr(alpha float64, x Vector, a SymmetricPacked) { + blas64.Dspr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data) +} + +// Syr2 performs a rank-2 update +// A += alpha * x * y^T + alpha * y * x^T, +// where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar. +func Syr2(alpha float64, x, y Vector, a Symmetric) { + blas64.Dsyr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Spr2 performs a rank-2 update +// A += alpha * x * y^T + alpha * y * x^T, +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func Spr2(alpha float64, x, y Vector, a SymmetricPacked) { + blas64.Dspr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data) +} + +// Level 3 + +// Gemm computes +// C = alpha * A * B + beta * C, +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed. +func Gemm(tA, tB blas.Transpose, alpha float64, a, b General, beta float64, c General) { + var m, n, k int + if tA == blas.NoTrans { + m, k = a.Rows, a.Cols + } else { + m, k = a.Cols, a.Rows + } + if tB == blas.NoTrans { + n = b.Cols + } else { + n = b.Rows + } + blas64.Dgemm(tA, tB, m, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Symm performs +// C = alpha * A * B + beta * C, if s == blas.Left, +// C = alpha * B * A + beta * C, if s == blas.Right, +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and +// alpha is a scalar. +func Symm(s blas.Side, alpha float64, a Symmetric, b General, beta float64, c General) { + var m, n int + if s == blas.Left { + m, n = a.N, b.Cols + } else { + m, n = b.Rows, a.N + } + blas64.Dsymm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Syrk performs a symmetric rank-k update +// C = alpha * A * A^T + beta * C, if t == blas.NoTrans, +// C = alpha * A^T * A + beta * C, if t == blas.Trans or blas.ConjTrans, +// where C is an n×n symmetric matrix, A is an n×k matrix if t == blas.NoTrans and +// a k×n matrix otherwise, and alpha and beta are scalars. +func Syrk(t blas.Transpose, alpha float64, a General, beta float64, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + blas64.Dsyrk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride) +} + +// Syr2k performs a symmetric rank-2k update +// C = alpha * A * B^T + alpha * B * A^T + beta * C, if t == blas.NoTrans, +// C = alpha * A^T * B + alpha * B^T * A + beta * C, if t == blas.Trans or blas.ConjTrans, +// where C is an n×n symmetric matrix, A and B are n×k matrices if t == NoTrans +// and k×n matrices otherwise, and alpha and beta are scalars. +func Syr2k(t blas.Transpose, alpha float64, a, b General, beta float64, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + blas64.Dsyr2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Trmm performs +// B = alpha * A * B, if tA == blas.NoTrans and s == blas.Left, +// B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Left, +// B = alpha * B * A, if tA == blas.NoTrans and s == blas.Right, +// B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and s == blas.Right, +// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is +// a scalar. +func Trmm(s blas.Side, tA blas.Transpose, alpha float64, a Triangular, b General) { + blas64.Dtrmm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} + +// Trsm solves +// A * X = alpha * B, if tA == blas.NoTrans and s == blas.Left, +// A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Left, +// X * A = alpha * B, if tA == blas.NoTrans and s == blas.Right, +// X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Right, +// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and +// alpha is a scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in-place into X. +// +// No check is made that A is invertible. +func Trsm(s blas.Side, tA blas.Transpose, alpha float64, a Triangular, b General) { + blas64.Dtrsm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} diff --git a/blas/cblas128/cblas128.go b/blas/cblas128/cblas128.go new file mode 100644 index 00000000..60be9dd7 --- /dev/null +++ b/blas/cblas128/cblas128.go @@ -0,0 +1,510 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cblas128 provides a simple interface to the complex128 BLAS API. +package cblas128 + +import ( + "github.com/gonum/blas" + "github.com/gonum/blas/cgo" +) + +// TODO(kortschak): Change this and the comment below to native.Implementation +// when blas/native covers the complex BLAS API. +var cblas128 blas.Complex128 = cgo.Implementation{} + +// Use sets the BLAS complex128 implementation to be used by subsequent BLAS calls. +// The default implementation is cgo.Implementation. +func Use(b blas.Complex128) { + cblas128 = b +} + +// Implementation returns the current BLAS complex128 implementation. +// +// Implementation allows direct calls to the current the BLAS complex128 implementation +// giving finer control of parameters. +func Implementation() blas.Complex128 { + return cblas128 +} + +// Vector represents a vector with an associated element increment. +type Vector struct { + Inc int + Data []complex128 +} + +// General represents a matrix using the conventional storage scheme. +type General struct { + Rows, Cols int + Stride int + Data []complex128 +} + +// Band represents a band matrix using the band storage scheme. +type Band struct { + Rows, Cols int + KL, KU int + Stride int + Data []complex128 +} + +// Triangular represents a triangular matrix using the conventional storage scheme. +type Triangular struct { + N int + Stride int + Data []complex128 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularBand represents a triangular matrix using the band storage scheme. +type TriangularBand struct { + N, K int + Stride int + Data []complex128 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularPacked represents a triangular matrix using the packed storage scheme. +type TriangularPacked struct { + N int + Data []complex128 + Uplo blas.Uplo + Diag blas.Diag +} + +// Symmetric represents a symmetric matrix using the conventional storage scheme. +type Symmetric struct { + N int + Stride int + Data []complex128 + Uplo blas.Uplo +} + +// SymmetricBand represents a symmetric matrix using the band storage scheme. +type SymmetricBand struct { + N, K int + Stride int + Data []complex128 + Uplo blas.Uplo +} + +// SymmetricPacked represents a symmetric matrix using the packed storage scheme. +type SymmetricPacked struct { + N int + Data []complex128 + Uplo blas.Uplo +} + +// Hermitian represents an Hermitian matrix using the conventional storage scheme. +type Hermitian Symmetric + +// HermitianBand represents an Hermitian matrix using the band storage scheme. +type HermitianBand SymmetricBand + +// HermitianPacked represents an Hermitian matrix using the packed storage scheme. +type HermitianPacked SymmetricPacked + +// Level 1 + +const negInc = "cblas128: negative vector increment" + +// Dotu computes the dot product of the two vectors without +// complex conjugation: +// x^T * y. +func Dotu(n int, x, y Vector) complex128 { + return cblas128.Zdotu(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Dotc computes the dot product of the two vectors with +// complex conjugation: +// x^H * y. +func Dotc(n int, x, y Vector) complex128 { + return cblas128.Zdotc(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Nrm2 computes the Euclidean norm of the vector x: +// sqrt(\sum_i x[i] * x[i]). +// +// Nrm2 will panic if the vector increment is negative. +func Nrm2(n int, x Vector) float64 { + if x.Inc < 0 { + panic(negInc) + } + return cblas128.Dznrm2(n, x.Data, x.Inc) +} + +// Asum computes the sum of magnitudes of the real and imaginary parts of +// elements of the vector x: +// \sum_i (|Re x[i]| + |Im x[i]|). +// +// Asum will panic if the vector increment is negative. +func Asum(n int, x Vector) float64 { + if x.Inc < 0 { + panic(negInc) + } + return cblas128.Dzasum(n, x.Data, x.Inc) +} + +// Iamax returns the index of an element of x with the largest sum of +// magnitudes of the real and imaginary parts (|Re x[i]|+|Im x[i]|). +// If there are multiple such indices, the earliest is returned. +// +// Iamax returns -1 if n == 0. +// +// Iamax will panic if the vector increment is negative. +func Iamax(n int, x Vector) int { + if x.Inc < 0 { + panic(negInc) + } + return cblas128.Izamax(n, x.Data, x.Inc) +} + +// Swap exchanges the elements of two vectors: +// x[i], y[i] = y[i], x[i] for all i. +func Swap(n int, x, y Vector) { + cblas128.Zswap(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Copy copies the elements of x into the elements of y: +// y[i] = x[i] for all i. +func Copy(n int, x, y Vector) { + cblas128.Zcopy(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Axpy computes +// y = alpha * x + y, +// where x and y are vectors, and alpha is a scalar. +func Axpy(n int, alpha complex128, x, y Vector) { + cblas128.Zaxpy(n, alpha, x.Data, x.Inc, y.Data, y.Inc) +} + +// Scal computes +// x = alpha * x, +// where x is a vector, and alpha is a scalar. +// +// Scal will panic if the vector increment is negative. +func Scal(n int, alpha complex128, x Vector) { + if x.Inc < 0 { + panic(negInc) + } + cblas128.Zscal(n, alpha, x.Data, x.Inc) +} + +// Dscal computes +// x = alpha * x, +// where x is a vector, and alpha is a real scalar. +// +// Dscal will panic if the vector increment is negative. +func Dscal(n int, alpha float64, x Vector) { + if x.Inc < 0 { + panic(negInc) + } + cblas128.Zdscal(n, alpha, x.Data, x.Inc) +} + +// Level 2 + +// Gemv computes +// y = alpha * A * x + beta * y, if t == blas.NoTrans, +// y = alpha * A^T * x + beta * y, if t == blas.Trans, +// y = alpha * A^H * x + beta * y, if t == blas.ConjTrans, +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are +// scalars. +func Gemv(t blas.Transpose, alpha complex128, a General, x Vector, beta complex128, y Vector) { + cblas128.Zgemv(t, a.Rows, a.Cols, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Gbmv computes +// y = alpha * A * x + beta * y, if t == blas.NoTrans, +// y = alpha * A^T * x + beta * y, if t == blas.Trans, +// y = alpha * A^H * x + beta * y, if t == blas.ConjTrans, +// where A is an m×n band matrix, x and y are vectors, and alpha and beta are +// scalars. +func Gbmv(t blas.Transpose, alpha complex128, a Band, x Vector, beta complex128, y Vector) { + cblas128.Zgbmv(t, a.Rows, a.Cols, a.KL, a.KU, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Trmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans, +// x = A^H * x, if t == blas.ConjTrans, +// where A is an n×n triangular matrix, and x is a vector. +func Trmv(t blas.Transpose, a Triangular, x Vector) { + cblas128.Ztrmv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans, +// x = A^H * x, if t == blas.ConjTrans, +// where A is an n×n triangular band matrix, and x is a vector. +func Tbmv(t blas.Transpose, a TriangularBand, x Vector) { + cblas128.Ztbmv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans, +// x = A^H * x, if t == blas.ConjTrans, +// where A is an n×n triangular matrix in packed format, and x is a vector. +func Tpmv(t blas.Transpose, a TriangularPacked, x Vector) { + cblas128.Ztpmv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Trsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans, +// A^H * x = b, if t == blas.ConjTrans, +// where A is an n×n triangular matrix and x is a vector. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Trsv(t blas.Transpose, a Triangular, x Vector) { + cblas128.Ztrsv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans, +// A^H * x = b, if t == blas.ConjTrans, +// where A is an n×n triangular band matrix, and x is a vector. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tbsv(t blas.Transpose, a TriangularBand, x Vector) { + cblas128.Ztbsv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans, +// A^H * x = b, if t == blas.ConjTrans, +// where A is an n×n triangular matrix in packed format and x is a vector. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tpsv(t blas.Transpose, a TriangularPacked, x Vector) { + cblas128.Ztpsv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Hemv computes +// y = alpha * A * x + beta * y, +// where A is an n×n Hermitian matrix, x and y are vectors, and alpha and +// beta are scalars. +func Hemv(alpha complex128, a Hermitian, x Vector, beta complex128, y Vector) { + cblas128.Zhemv(a.Uplo, a.N, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Hbmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n Hermitian band matrix, x and y are vectors, and alpha +// and beta are scalars. +func Hbmv(alpha complex128, a HermitianBand, x Vector, beta complex128, y Vector) { + cblas128.Zhbmv(a.Uplo, a.N, a.K, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Hpmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n Hermitian matrix in packed format, x and y are vectors, +// and alpha and beta are scalars. +func Hpmv(alpha complex128, a HermitianPacked, x Vector, beta complex128, y Vector) { + cblas128.Zhpmv(a.Uplo, a.N, alpha, a.Data, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Geru performs a rank-1 update +// A += alpha * x * y^T, +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Geru(alpha complex128, x, y Vector, a General) { + cblas128.Zgeru(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Gerc performs a rank-1 update +// A += alpha * x * y^H, +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Gerc(alpha complex128, x, y Vector, a General) { + cblas128.Zgerc(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Her performs a rank-1 update +// A += alpha * x * y^T, +// where A is an m×n Hermitian matrix, x and y are vectors, and alpha is a scalar. +func Her(alpha float64, x Vector, a Hermitian) { + cblas128.Zher(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data, a.Stride) +} + +// Hpr performs a rank-1 update +// A += alpha * x * x^H, +// where A is an n×n Hermitian matrix in packed format, x is a vector, and +// alpha is a scalar. +func Hpr(alpha float64, x Vector, a HermitianPacked) { + cblas128.Zhpr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data) +} + +// Her2 performs a rank-2 update +// A += alpha * x * y^H + conj(alpha) * y * x^H, +// where A is an n×n Hermitian matrix, x and y are vectors, and alpha is a scalar. +func Her2(alpha complex128, x, y Vector, a Hermitian) { + cblas128.Zher2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Hpr2 performs a rank-2 update +// A += alpha * x * y^H + conj(alpha) * y * x^H, +// where A is an n×n Hermitian matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func Hpr2(alpha complex128, x, y Vector, a HermitianPacked) { + cblas128.Zhpr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data) +} + +// Level 3 + +// Gemm computes +// C = alpha * A * B + beta * C, +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed or conjugated. +func Gemm(tA, tB blas.Transpose, alpha complex128, a, b General, beta complex128, c General) { + var m, n, k int + if tA == blas.NoTrans { + m, k = a.Rows, a.Cols + } else { + m, k = a.Cols, a.Rows + } + if tB == blas.NoTrans { + n = b.Cols + } else { + n = b.Rows + } + cblas128.Zgemm(tA, tB, m, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Symm performs +// C = alpha * A * B + beta * C, if s == blas.Left, +// C = alpha * B * A + beta * C, if s == blas.Right, +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and +// alpha and beta are scalars. +func Symm(s blas.Side, alpha complex128, a Symmetric, b General, beta complex128, c General) { + var m, n int + if s == blas.Left { + m, n = a.N, b.Cols + } else { + m, n = b.Rows, a.N + } + cblas128.Zsymm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Syrk performs a symmetric rank-k update +// C = alpha * A * A^T + beta * C, if t == blas.NoTrans, +// C = alpha * A^T * A + beta * C, if t == blas.Trans, +// where C is an n×n symmetric matrix, A is an n×k matrix if t == blas.NoTrans +// and a k×n matrix otherwise, and alpha and beta are scalars. +func Syrk(t blas.Transpose, alpha complex128, a General, beta complex128, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas128.Zsyrk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride) +} + +// Syr2k performs a symmetric rank-2k update +// C = alpha * A * B^T + alpha * B * A^T + beta * C, if t == blas.NoTrans, +// C = alpha * A^T * B + alpha * B^T * A + beta * C, if t == blas.Trans, +// where C is an n×n symmetric matrix, A and B are n×k matrices if +// t == blas.NoTrans and k×n otherwise, and alpha and beta are scalars. +func Syr2k(t blas.Transpose, alpha complex128, a, b General, beta complex128, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas128.Zsyr2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Trmm performs +// B = alpha * A * B, if tA == blas.NoTrans and s == blas.Left, +// B = alpha * A^T * B, if tA == blas.Trans and s == blas.Left, +// B = alpha * A^H * B, if tA == blas.ConjTrans and s == blas.Left, +// B = alpha * B * A, if tA == blas.NoTrans and s == blas.Right, +// B = alpha * B * A^T, if tA == blas.Trans and s == blas.Right, +// B = alpha * B * A^H, if tA == blas.ConjTrans and s == blas.Right, +// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is +// a scalar. +func Trmm(s blas.Side, tA blas.Transpose, alpha complex128, a Triangular, b General) { + cblas128.Ztrmm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} + +// Trsm solves +// A * X = alpha * B, if tA == blas.NoTrans and s == blas.Left, +// A^T * X = alpha * B, if tA == blas.Trans and s == blas.Left, +// A^H * X = alpha * B, if tA == blas.ConjTrans and s == blas.Left, +// X * A = alpha * B, if tA == blas.NoTrans and s == blas.Right, +// X * A^T = alpha * B, if tA == blas.Trans and s == blas.Right, +// X * A^H = alpha * B, if tA == blas.ConjTrans and s == blas.Right, +// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and +// alpha is a scalar. +// +// At entry to the function, b contains the values of B, and the result is +// stored in-place into b. +// +// No check is made that A is invertible. +func Trsm(s blas.Side, tA blas.Transpose, alpha complex128, a Triangular, b General) { + cblas128.Ztrsm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} + +// Hemm performs +// C = alpha * A * B + beta * C, if s == blas.Left, +// C = alpha * B * A + beta * C, if s == blas.Right, +// where A is an n×n or m×m Hermitian matrix, B and C are m×n matrices, and +// alpha and beta are scalars. +func Hemm(s blas.Side, alpha complex128, a Hermitian, b General, beta complex128, c General) { + var m, n int + if s == blas.Left { + m, n = a.N, b.Cols + } else { + m, n = b.Rows, a.N + } + cblas128.Zhemm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Herk performs the Hermitian rank-k update +// C = alpha * A * A^H + beta*C, if t == blas.NoTrans, +// C = alpha * A^H * A + beta*C, if t == blas.ConjTrans, +// where C is an n×n Hermitian matrix, A is an n×k matrix if t == blas.NoTrans +// and a k×n matrix otherwise, and alpha and beta are scalars. +func Herk(t blas.Transpose, alpha float64, a General, beta float64, c Hermitian) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas128.Zherk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride) +} + +// Her2k performs the Hermitian rank-2k update +// C = alpha * A * B^H + conj(alpha) * B * A^H + beta * C, if t == blas.NoTrans, +// C = alpha * A^H * B + conj(alpha) * B^H * A + beta * C, if t == blas.ConjTrans, +// where C is an n×n Hermitian matrix, A and B are n×k matrices if t == NoTrans +// and k×n matrices otherwise, and alpha and beta are scalars. +func Her2k(t blas.Transpose, alpha complex128, a, b General, beta float64, c Hermitian) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas128.Zher2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} diff --git a/blas/cblas64/cblas64.go b/blas/cblas64/cblas64.go new file mode 100644 index 00000000..08c7738c --- /dev/null +++ b/blas/cblas64/cblas64.go @@ -0,0 +1,510 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cblas64 provides a simple interface to the complex64 BLAS API. +package cblas64 + +import ( + "github.com/gonum/blas" + "github.com/gonum/blas/cgo" +) + +// TODO(kortschak): Change this and the comment below to native.Implementation +// when blas/native covers the complex BLAS API. +var cblas64 blas.Complex64 = cgo.Implementation{} + +// Use sets the BLAS complex64 implementation to be used by subsequent BLAS calls. +// The default implementation is cgo.Implementation. +func Use(b blas.Complex64) { + cblas64 = b +} + +// Implementation returns the current BLAS complex64 implementation. +// +// Implementation allows direct calls to the current the BLAS complex64 implementation +// giving finer control of parameters. +func Implementation() blas.Complex64 { + return cblas64 +} + +// Vector represents a vector with an associated element increment. +type Vector struct { + Inc int + Data []complex64 +} + +// General represents a matrix using the conventional storage scheme. +type General struct { + Rows, Cols int + Stride int + Data []complex64 +} + +// Band represents a band matrix using the band storage scheme. +type Band struct { + Rows, Cols int + KL, KU int + Stride int + Data []complex64 +} + +// Triangular represents a triangular matrix using the conventional storage scheme. +type Triangular struct { + N int + Stride int + Data []complex64 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularBand represents a triangular matrix using the band storage scheme. +type TriangularBand struct { + N, K int + Stride int + Data []complex64 + Uplo blas.Uplo + Diag blas.Diag +} + +// TriangularPacked represents a triangular matrix using the packed storage scheme. +type TriangularPacked struct { + N int + Data []complex64 + Uplo blas.Uplo + Diag blas.Diag +} + +// Symmetric represents a symmetric matrix using the conventional storage scheme. +type Symmetric struct { + N int + Stride int + Data []complex64 + Uplo blas.Uplo +} + +// SymmetricBand represents a symmetric matrix using the band storage scheme. +type SymmetricBand struct { + N, K int + Stride int + Data []complex64 + Uplo blas.Uplo +} + +// SymmetricPacked represents a symmetric matrix using the packed storage scheme. +type SymmetricPacked struct { + N int + Data []complex64 + Uplo blas.Uplo +} + +// Hermitian represents an Hermitian matrix using the conventional storage scheme. +type Hermitian Symmetric + +// HermitianBand represents an Hermitian matrix using the band storage scheme. +type HermitianBand SymmetricBand + +// HermitianPacked represents an Hermitian matrix using the packed storage scheme. +type HermitianPacked SymmetricPacked + +// Level 1 + +const negInc = "cblas64: negative vector increment" + +// Dotu computes the dot product of the two vectors without +// complex conjugation: +// x^T * y +func Dotu(n int, x, y Vector) complex64 { + return cblas64.Cdotu(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Dotc computes the dot product of the two vectors with +// complex conjugation: +// x^H * y. +func Dotc(n int, x, y Vector) complex64 { + return cblas64.Cdotc(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Nrm2 computes the Euclidean norm of the vector x: +// sqrt(\sum_i x[i] * x[i]). +// +// Nrm2 will panic if the vector increment is negative. +func Nrm2(n int, x Vector) float32 { + if x.Inc < 0 { + panic(negInc) + } + return cblas64.Scnrm2(n, x.Data, x.Inc) +} + +// Asum computes the sum of magnitudes of the real and imaginary parts of +// elements of the vector x: +// \sum_i (|Re x[i]| + |Im x[i]|). +// +// Asum will panic if the vector increment is negative. +func Asum(n int, x Vector) float32 { + if x.Inc < 0 { + panic(negInc) + } + return cblas64.Scasum(n, x.Data, x.Inc) +} + +// Iamax returns the index of an element of x with the largest sum of +// magnitudes of the real and imaginary parts (|Re x[i]|+|Im x[i]|). +// If there are multiple such indices, the earliest is returned. +// +// Iamax returns -1 if n == 0. +// +// Iamax will panic if the vector increment is negative. +func Iamax(n int, x Vector) int { + if x.Inc < 0 { + panic(negInc) + } + return cblas64.Icamax(n, x.Data, x.Inc) +} + +// Swap exchanges the elements of two vectors: +// x[i], y[i] = y[i], x[i] for all i. +func Swap(n int, x, y Vector) { + cblas64.Cswap(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Copy copies the elements of x into the elements of y: +// y[i] = x[i] for all i. +func Copy(n int, x, y Vector) { + cblas64.Ccopy(n, x.Data, x.Inc, y.Data, y.Inc) +} + +// Axpy computes +// y = alpha * x + y, +// where x and y are vectors, and alpha is a scalar. +func Axpy(n int, alpha complex64, x, y Vector) { + cblas64.Caxpy(n, alpha, x.Data, x.Inc, y.Data, y.Inc) +} + +// Scal computes +// x = alpha * x, +// where x is a vector, and alpha is a scalar. +// +// Scal will panic if the vector increment is negative. +func Scal(n int, alpha complex64, x Vector) { + if x.Inc < 0 { + panic(negInc) + } + cblas64.Cscal(n, alpha, x.Data, x.Inc) +} + +// Dscal computes +// x = alpha * x, +// where x is a vector, and alpha is a real scalar. +// +// Dscal will panic if the vector increment is negative. +func Dscal(n int, alpha float32, x Vector) { + if x.Inc < 0 { + panic(negInc) + } + cblas64.Csscal(n, alpha, x.Data, x.Inc) +} + +// Level 2 + +// Gemv computes +// y = alpha * A * x + beta * y, if t == blas.NoTrans, +// y = alpha * A^T * x + beta * y, if t == blas.Trans, +// y = alpha * A^H * x + beta * y, if t == blas.ConjTrans, +// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are +// scalars. +func Gemv(t blas.Transpose, alpha complex64, a General, x Vector, beta complex64, y Vector) { + cblas64.Cgemv(t, a.Rows, a.Cols, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Gbmv computes +// y = alpha * A * x + beta * y, if t == blas.NoTrans, +// y = alpha * A^T * x + beta * y, if t == blas.Trans, +// y = alpha * A^H * x + beta * y, if t == blas.ConjTrans, +// where A is an m×n band matrix, x and y are vectors, and alpha and beta are +// scalars. +func Gbmv(t blas.Transpose, alpha complex64, a Band, x Vector, beta complex64, y Vector) { + cblas64.Cgbmv(t, a.Rows, a.Cols, a.KL, a.KU, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Trmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans, +// x = A^H * x, if t == blas.ConjTrans, +// where A is an n×n triangular matrix, and x is a vector. +func Trmv(t blas.Transpose, a Triangular, x Vector) { + cblas64.Ctrmv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans, +// x = A^H * x, if t == blas.ConjTrans, +// where A is an n×n triangular band matrix, and x is a vector. +func Tbmv(t blas.Transpose, a TriangularBand, x Vector) { + cblas64.Ctbmv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpmv computes +// x = A * x, if t == blas.NoTrans, +// x = A^T * x, if t == blas.Trans, +// x = A^H * x, if t == blas.ConjTrans, +// where A is an n×n triangular matrix in packed format, and x is a vector. +func Tpmv(t blas.Transpose, a TriangularPacked, x Vector) { + cblas64.Ctpmv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Trsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans, +// A^H * x = b, if t == blas.ConjTrans, +// where A is an n×n triangular matrix and x is a vector. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Trsv(t blas.Transpose, a Triangular, x Vector) { + cblas64.Ctrsv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tbsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans, +// A^H * x = b, if t == blas.ConjTrans, +// where A is an n×n triangular band matrix, and x is a vector. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tbsv(t blas.Transpose, a TriangularBand, x Vector) { + cblas64.Ctbsv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc) +} + +// Tpsv solves +// A * x = b, if t == blas.NoTrans, +// A^T * x = b, if t == blas.Trans, +// A^H * x = b, if t == blas.ConjTrans, +// where A is an n×n triangular matrix in packed format and x is a vector. +// +// At entry to the function, x contains the values of b, and the result is +// stored in-place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func Tpsv(t blas.Transpose, a TriangularPacked, x Vector) { + cblas64.Ctpsv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc) +} + +// Hemv computes +// y = alpha * A * x + beta * y, +// where A is an n×n Hermitian matrix, x and y are vectors, and alpha and +// beta are scalars. +func Hemv(alpha complex64, a Hermitian, x Vector, beta complex64, y Vector) { + cblas64.Chemv(a.Uplo, a.N, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Hbmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n Hermitian band matrix, x and y are vectors, and alpha +// and beta are scalars. +func Hbmv(alpha complex64, a HermitianBand, x Vector, beta complex64, y Vector) { + cblas64.Chbmv(a.Uplo, a.N, a.K, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Hpmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n Hermitian matrix in packed format, x and y are vectors, +// and alpha and beta are scalars. +func Hpmv(alpha complex64, a HermitianPacked, x Vector, beta complex64, y Vector) { + cblas64.Chpmv(a.Uplo, a.N, alpha, a.Data, x.Data, x.Inc, beta, y.Data, y.Inc) +} + +// Geru performs a rank-1 update +// A += alpha * x * y^T, +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Geru(alpha complex64, x, y Vector, a General) { + cblas64.Cgeru(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Gerc performs a rank-1 update +// A += alpha * x * y^H, +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func Gerc(alpha complex64, x, y Vector, a General) { + cblas64.Cgerc(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Her performs a rank-1 update +// A += alpha * x * y^T, +// where A is an m×n Hermitian matrix, x and y are vectors, and alpha is a scalar. +func Her(alpha float32, x Vector, a Hermitian) { + cblas64.Cher(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data, a.Stride) +} + +// Hpr performs a rank-1 update +// A += alpha * x * x^H, +// where A is an n×n Hermitian matrix in packed format, x is a vector, and +// alpha is a scalar. +func Hpr(alpha float32, x Vector, a HermitianPacked) { + cblas64.Chpr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data) +} + +// Her2 performs a rank-2 update +// A += alpha * x * y^H + conj(alpha) * y * x^H, +// where A is an n×n Hermitian matrix, x and y are vectors, and alpha is a scalar. +func Her2(alpha complex64, x, y Vector, a Hermitian) { + cblas64.Cher2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride) +} + +// Hpr2 performs a rank-2 update +// A += alpha * x * y^H + conj(alpha) * y * x^H, +// where A is an n×n Hermitian matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func Hpr2(alpha complex64, x, y Vector, a HermitianPacked) { + cblas64.Chpr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data) +} + +// Level 3 + +// Gemm computes +// C = alpha * A * B + beta * C, +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed or conjugated. +func Gemm(tA, tB blas.Transpose, alpha complex64, a, b General, beta complex64, c General) { + var m, n, k int + if tA == blas.NoTrans { + m, k = a.Rows, a.Cols + } else { + m, k = a.Cols, a.Rows + } + if tB == blas.NoTrans { + n = b.Cols + } else { + n = b.Rows + } + cblas64.Cgemm(tA, tB, m, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Symm performs +// C = alpha * A * B + beta * C, if s == blas.Left, +// C = alpha * B * A + beta * C, if s == blas.Right, +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and +// alpha and beta are scalars. +func Symm(s blas.Side, alpha complex64, a Symmetric, b General, beta complex64, c General) { + var m, n int + if s == blas.Left { + m, n = a.N, b.Cols + } else { + m, n = b.Rows, a.N + } + cblas64.Csymm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Syrk performs a symmetric rank-k update +// C = alpha * A * A^T + beta * C, if t == blas.NoTrans, +// C = alpha * A^T * A + beta * C, if t == blas.Trans, +// where C is an n×n symmetric matrix, A is an n×k matrix if t == blas.NoTrans +// and a k×n matrix otherwise, and alpha and beta are scalars. +func Syrk(t blas.Transpose, alpha complex64, a General, beta complex64, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas64.Csyrk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride) +} + +// Syr2k performs a symmetric rank-2k update +// C = alpha * A * B^T + alpha * B * A^T + beta * C, if t == blas.NoTrans, +// C = alpha * A^T * B + alpha * B^T * A + beta * C, if t == blas.Trans, +// where C is an n×n symmetric matrix, A and B are n×k matrices if +// t == blas.NoTrans and k×n otherwise, and alpha and beta are scalars. +func Syr2k(t blas.Transpose, alpha complex64, a, b General, beta complex64, c Symmetric) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas64.Csyr2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Trmm performs +// B = alpha * A * B, if tA == blas.NoTrans and s == blas.Left, +// B = alpha * A^T * B, if tA == blas.Trans and s == blas.Left, +// B = alpha * A^H * B, if tA == blas.ConjTrans and s == blas.Left, +// B = alpha * B * A, if tA == blas.NoTrans and s == blas.Right, +// B = alpha * B * A^T, if tA == blas.Trans and s == blas.Right, +// B = alpha * B * A^H, if tA == blas.ConjTrans and s == blas.Right, +// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is +// a scalar. +func Trmm(s blas.Side, tA blas.Transpose, alpha complex64, a Triangular, b General) { + cblas64.Ctrmm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} + +// Trsm solves +// A * X = alpha * B, if tA == blas.NoTrans and s == blas.Left, +// A^T * X = alpha * B, if tA == blas.Trans and s == blas.Left, +// A^H * X = alpha * B, if tA == blas.ConjTrans and s == blas.Left, +// X * A = alpha * B, if tA == blas.NoTrans and s == blas.Right, +// X * A^T = alpha * B, if tA == blas.Trans and s == blas.Right, +// X * A^H = alpha * B, if tA == blas.ConjTrans and s == blas.Right, +// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and +// alpha is a scalar. +// +// At entry to the function, b contains the values of B, and the result is +// stored in-place into b. +// +// No check is made that A is invertible. +func Trsm(s blas.Side, tA blas.Transpose, alpha complex64, a Triangular, b General) { + cblas64.Ctrsm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride) +} + +// Hemm performs +// C = alpha * A * B + beta * C, if s == blas.Left, +// C = alpha * B * A + beta * C, if s == blas.Right, +// where A is an n×n or m×m Hermitian matrix, B and C are m×n matrices, and +// alpha and beta are scalars. +func Hemm(s blas.Side, alpha complex64, a Hermitian, b General, beta complex64, c General) { + var m, n int + if s == blas.Left { + m, n = a.N, b.Cols + } else { + m, n = b.Rows, a.N + } + cblas64.Chemm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} + +// Herk performs the Hermitian rank-k update +// C = alpha * A * A^H + beta*C, if t == blas.NoTrans, +// C = alpha * A^H * A + beta*C, if t == blas.ConjTrans, +// where C is an n×n Hermitian matrix, A is an n×k matrix if t == blas.NoTrans +// and a k×n matrix otherwise, and alpha and beta are scalars. +func Herk(t blas.Transpose, alpha float32, a General, beta float32, c Hermitian) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas64.Cherk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride) +} + +// Her2k performs the Hermitian rank-2k update +// C = alpha * A * B^H + conj(alpha) * B * A^H + beta * C, if t == blas.NoTrans, +// C = alpha * A^H * B + conj(alpha) * B^H * A + beta * C, if t == blas.ConjTrans, +// where C is an n×n Hermitian matrix, A and B are n×k matrices if t == NoTrans +// and k×n matrices otherwise, and alpha and beta are scalars. +func Her2k(t blas.Transpose, alpha complex64, a, b General, beta float32, c Hermitian) { + var n, k int + if t == blas.NoTrans { + n, k = a.Rows, a.Cols + } else { + n, k = a.Cols, a.Rows + } + cblas64.Cher2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride) +} diff --git a/blas/cgo/bench_test.go b/blas/cgo/bench_test.go new file mode 100644 index 00000000..574bfb11 --- /dev/null +++ b/blas/cgo/bench_test.go @@ -0,0 +1,18 @@ +package cgo + +import ( + "github.com/gonum/blas" + "github.com/gonum/blas/testblas" +) + +const ( + Sm = testblas.SmallMat + Med = testblas.MediumMat + Lg = testblas.LargeMat + Hg = testblas.HugeMat +) + +const ( + T = blas.Trans + NT = blas.NoTrans +) diff --git a/blas/cgo/blas.go b/blas/cgo/blas.go new file mode 100644 index 00000000..552bc051 --- /dev/null +++ b/blas/cgo/blas.go @@ -0,0 +1,4258 @@ +// Do not manually edit this file. It was created by the generate_blas.go from cblas.h. + +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cgo + +/* +#cgo CFLAGS: -g -O2 +#include "cblas.h" +*/ +import "C" + +import ( + "unsafe" + + "github.com/gonum/blas" +) + +// Type check assertions: +var ( + _ blas.Float32 = Implementation{} + _ blas.Float64 = Implementation{} + _ blas.Complex64 = Implementation{} + _ blas.Complex128 = Implementation{} +) + +// Type order is used to specify the matrix storage format. We still interact with +// an API that allows client calls to specify order, so this is here to document that fact. +type order int + +const ( + rowMajor order = 101 + iota +) + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +type Implementation struct{} + +// Special cases... + +type srotmParams struct { + flag float32 + h [4]float32 +} + +type drotmParams struct { + flag float64 + h [4]float64 +} + +func (Implementation) Srotg(a float32, b float32) (c float32, s float32, r float32, z float32) { + C.cblas_srotg((*C.float)(&a), (*C.float)(&b), (*C.float)(&c), (*C.float)(&s)) + return c, s, a, b +} +func (Implementation) Srotmg(d1 float32, d2 float32, b1 float32, b2 float32) (p blas.SrotmParams, rd1 float32, rd2 float32, rb1 float32) { + var pi srotmParams + C.cblas_srotmg((*C.float)(&d1), (*C.float)(&d2), (*C.float)(&b1), C.float(b2), (*C.float)(unsafe.Pointer(&pi))) + return blas.SrotmParams{Flag: blas.Flag(pi.flag), H: pi.h}, d1, d2, b1 +} +func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if p.Flag < blas.Identity || p.Flag > blas.Diagonal { + panic("blas: illegal blas.Flag value") + } + if n == 0 { + return + } + pi := srotmParams{ + flag: float32(p.Flag), + h: p.H, + } + C.cblas_srotm(C.int(n), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY), (*C.float)(unsafe.Pointer(&pi))) +} +func (Implementation) Drotg(a float64, b float64) (c float64, s float64, r float64, z float64) { + C.cblas_drotg((*C.double)(&a), (*C.double)(&b), (*C.double)(&c), (*C.double)(&s)) + return c, s, a, b +} +func (Implementation) Drotmg(d1 float64, d2 float64, b1 float64, b2 float64) (p blas.DrotmParams, rd1 float64, rd2 float64, rb1 float64) { + var pi drotmParams + C.cblas_drotmg((*C.double)(&d1), (*C.double)(&d2), (*C.double)(&b1), C.double(b2), (*C.double)(unsafe.Pointer(&pi))) + return blas.DrotmParams{Flag: blas.Flag(pi.flag), H: pi.h}, d1, d2, b1 +} +func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if p.Flag < blas.Identity || p.Flag > blas.Diagonal { + panic("blas: illegal blas.Flag value") + } + if n == 0 { + return + } + pi := drotmParams{ + flag: float64(p.Flag), + h: p.H, + } + C.cblas_drotm(C.int(n), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY), (*C.double)(unsafe.Pointer(&pi))) +} +func (Implementation) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + C.cblas_cdotu_sub(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&dotu)) + return dotu +} +func (Implementation) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + C.cblas_cdotc_sub(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&dotc)) + return dotc +} +func (Implementation) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + C.cblas_zdotu_sub(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&dotu)) + return dotu +} +func (Implementation) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + C.cblas_zdotc_sub(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&dotc)) + return dotc +} + +// Generated cases ... + +// Sdsdot computes the dot product of the two vectors plus a constant +// alpha + \sum_i x[i]*y[i] +func (Implementation) Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32 { + // declared at cblas.h:24:8 float cblas_sdsdot ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + return float32(C.cblas_sdsdot(C.int(n), C.float(alpha), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY))) +} + +// Dsdot computes the dot product of the two vectors +// \sum_i x[i]*y[i] +func (Implementation) Dsdot(n int, x []float32, incX int, y []float32, incY int) float64 { + // declared at cblas.h:26:8 double cblas_dsdot ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + return float64(C.cblas_dsdot(C.int(n), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY))) +} + +// Sdot computes the dot product of the two vectors +// \sum_i x[i]*y[i] +func (Implementation) Sdot(n int, x []float32, incX int, y []float32, incY int) float32 { + // declared at cblas.h:28:8 float cblas_sdot ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + return float32(C.cblas_sdot(C.int(n), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY))) +} + +// Ddot computes the dot product of the two vectors +// \sum_i x[i]*y[i] +func (Implementation) Ddot(n int, x []float64, incX int, y []float64, incY int) float64 { + // declared at cblas.h:30:8 double cblas_ddot ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + return float64(C.cblas_ddot(C.int(n), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY))) +} + +// Snrm2 computes the Euclidean norm of a vector, +// sqrt(\sum_i x[i] * x[i]). +// This function returns 0 if incX is negative. +func (Implementation) Snrm2(n int, x []float32, incX int) float32 { + // declared at cblas.h:49:8 float cblas_snrm2 ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return 0 + } + return float32(C.cblas_snrm2(C.int(n), (*C.float)(&x[0]), C.int(incX))) +} + +// Sasum computes the sum of the absolute values of the elements of x. +// \sum_i |x[i]| +// Sasum returns 0 if incX is negative. +func (Implementation) Sasum(n int, x []float32, incX int) float32 { + // declared at cblas.h:50:8 float cblas_sasum ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return 0 + } + return float32(C.cblas_sasum(C.int(n), (*C.float)(&x[0]), C.int(incX))) +} + +// Dnrm2 computes the Euclidean norm of a vector, +// sqrt(\sum_i x[i] * x[i]). +// This function returns 0 if incX is negative. +func (Implementation) Dnrm2(n int, x []float64, incX int) float64 { + // declared at cblas.h:52:8 double cblas_dnrm2 ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return 0 + } + return float64(C.cblas_dnrm2(C.int(n), (*C.double)(&x[0]), C.int(incX))) +} + +// Dasum computes the sum of the absolute values of the elements of x. +// \sum_i |x[i]| +// Dasum returns 0 if incX is negative. +func (Implementation) Dasum(n int, x []float64, incX int) float64 { + // declared at cblas.h:53:8 double cblas_dasum ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return 0 + } + return float64(C.cblas_dasum(C.int(n), (*C.double)(&x[0]), C.int(incX))) +} + +func (Implementation) Scnrm2(n int, x []complex64, incX int) float32 { + // declared at cblas.h:55:8 float cblas_scnrm2 ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return 0 + } + return float32(C.cblas_scnrm2(C.int(n), unsafe.Pointer(&x[0]), C.int(incX))) +} + +func (Implementation) Scasum(n int, x []complex64, incX int) float32 { + // declared at cblas.h:56:8 float cblas_scasum ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return 0 + } + return float32(C.cblas_scasum(C.int(n), unsafe.Pointer(&x[0]), C.int(incX))) +} + +func (Implementation) Dznrm2(n int, x []complex128, incX int) float64 { + // declared at cblas.h:58:8 double cblas_dznrm2 ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return 0 + } + return float64(C.cblas_dznrm2(C.int(n), unsafe.Pointer(&x[0]), C.int(incX))) +} + +func (Implementation) Dzasum(n int, x []complex128, incX int) float64 { + // declared at cblas.h:59:8 double cblas_dzasum ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return 0 + } + return float64(C.cblas_dzasum(C.int(n), unsafe.Pointer(&x[0]), C.int(incX))) +} + +// Isamax returns the index of an element of x with the largest absolute value. +// If there are multiple such indices the earliest is returned. +// Isamax returns -1 if n == 0. +func (Implementation) Isamax(n int, x []float32, incX int) int { + // declared at cblas.h:65:13 int cblas_isamax ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if n == 0 || incX < 0 { + return -1 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return -1 + } + return int(C.cblas_isamax(C.int(n), (*C.float)(&x[0]), C.int(incX))) +} + +// Idamax returns the index of an element of x with the largest absolute value. +// If there are multiple such indices the earliest is returned. +// Idamax returns -1 if n == 0. +func (Implementation) Idamax(n int, x []float64, incX int) int { + // declared at cblas.h:66:13 int cblas_idamax ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if n == 0 || incX < 0 { + return -1 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return -1 + } + return int(C.cblas_idamax(C.int(n), (*C.double)(&x[0]), C.int(incX))) +} + +func (Implementation) Icamax(n int, x []complex64, incX int) int { + // declared at cblas.h:67:13 int cblas_icamax ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if n == 0 || incX < 0 { + return -1 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return -1 + } + return int(C.cblas_icamax(C.int(n), unsafe.Pointer(&x[0]), C.int(incX))) +} + +func (Implementation) Izamax(n int, x []complex128, incX int) int { + // declared at cblas.h:68:13 int cblas_izamax ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if n == 0 || incX < 0 { + return -1 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return -1 + } + return int(C.cblas_izamax(C.int(n), unsafe.Pointer(&x[0]), C.int(incX))) +} + +// Sswap exchanges the elements of two vectors. +// x[i], y[i] = y[i], x[i] for all i +func (Implementation) Sswap(n int, x []float32, incX int, y []float32, incY int) { + // declared at cblas.h:79:6 void cblas_sswap ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_sswap(C.int(n), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY)) +} + +// Scopy copies the elements of x into the elements of y. +// y[i] = x[i] for all i +func (Implementation) Scopy(n int, x []float32, incX int, y []float32, incY int) { + // declared at cblas.h:81:6 void cblas_scopy ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_scopy(C.int(n), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY)) +} + +// Saxpy adds alpha times x to y +// y[i] += alpha * x[i] for all i +func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int) { + // declared at cblas.h:83:6 void cblas_saxpy ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_saxpy(C.int(n), C.float(alpha), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY)) +} + +// Dswap exchanges the elements of two vectors. +// x[i], y[i] = y[i], x[i] for all i +func (Implementation) Dswap(n int, x []float64, incX int, y []float64, incY int) { + // declared at cblas.h:90:6 void cblas_dswap ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_dswap(C.int(n), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY)) +} + +// Dcopy copies the elements of x into the elements of y. +// y[i] = x[i] for all i +func (Implementation) Dcopy(n int, x []float64, incX int, y []float64, incY int) { + // declared at cblas.h:92:6 void cblas_dcopy ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_dcopy(C.int(n), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY)) +} + +// Daxpy adds alpha times x to y +// y[i] += alpha * x[i] for all i +func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) { + // declared at cblas.h:94:6 void cblas_daxpy ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_daxpy(C.int(n), C.double(alpha), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY)) +} + +func (Implementation) Cswap(n int, x []complex64, incX int, y []complex64, incY int) { + // declared at cblas.h:101:6 void cblas_cswap ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_cswap(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Ccopy(n int, x []complex64, incX int, y []complex64, incY int) { + // declared at cblas.h:103:6 void cblas_ccopy ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_ccopy(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) { + // declared at cblas.h:105:6 void cblas_caxpy ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_caxpy(C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Zswap(n int, x []complex128, incX int, y []complex128, incY int) { + // declared at cblas.h:112:6 void cblas_zswap ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_zswap(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Zcopy(n int, x []complex128, incX int, y []complex128, incY int) { + // declared at cblas.h:114:6 void cblas_zcopy ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_zcopy(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) { + // declared at cblas.h:116:6 void cblas_zaxpy ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_zaxpy(C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY)) +} + +// Srot applies a plane transformation. +// x[i] = c * x[i] + s * y[i] +// y[i] = c * y[i] - s * x[i] +func (Implementation) Srot(n int, x []float32, incX int, y []float32, incY int, c, s float32) { + // declared at cblas.h:129:6 void cblas_srot ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_srot(C.int(n), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY), C.float(c), C.float(s)) +} + +// Drot applies a plane transformation. +// x[i] = c * x[i] + s * y[i] +// y[i] = c * y[i] - s * x[i] +func (Implementation) Drot(n int, x []float64, incX int, y []float64, incY int, c, s float64) { + // declared at cblas.h:136:6 void cblas_drot ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_drot(C.int(n), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY), C.double(c), C.double(s)) +} + +// Sscal scales x by alpha. +// x[i] *= alpha +// Sscal has no effect if incX < 0. +func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) { + // declared at cblas.h:145:6 void cblas_sscal ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_sscal(C.int(n), C.float(alpha), (*C.float)(&x[0]), C.int(incX)) +} + +// Dscal scales x by alpha. +// x[i] *= alpha +// Dscal has no effect if incX < 0. +func (Implementation) Dscal(n int, alpha float64, x []float64, incX int) { + // declared at cblas.h:146:6 void cblas_dscal ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_dscal(C.int(n), C.double(alpha), (*C.double)(&x[0]), C.int(incX)) +} + +func (Implementation) Cscal(n int, alpha complex64, x []complex64, incX int) { + // declared at cblas.h:147:6 void cblas_cscal ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_cscal(C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Zscal(n int, alpha complex128, x []complex128, incX int) { + // declared at cblas.h:148:6 void cblas_zscal ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_zscal(C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Csscal(n int, alpha float32, x []complex64, incX int) { + // declared at cblas.h:149:6 void cblas_csscal ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incX < 0 { + return + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_csscal(C.int(n), C.float(alpha), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Zdscal(n int, alpha float64, x []complex128, incX int) { + // declared at cblas.h:150:6 void cblas_zdscal ... + + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_zdscal(C.int(n), C.double(alpha), unsafe.Pointer(&x[0]), C.int(incX)) +} + +// Sgemv computes +// y = alpha * a * x + beta * y if tA = blas.NoTrans +// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + // declared at cblas.h:171:6 void cblas_sgemv ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + var lenX, lenY int + if tA == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_sgemv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.int(m), C.int(n), C.float(alpha), (*C.float)(&a[0]), C.int(lda), (*C.float)(&x[0]), C.int(incX), C.float(beta), (*C.float)(&y[0]), C.int(incY)) +} + +// Sgbmv computes +// y = alpha * A * x + beta * y if tA == blas.NoTrans +// y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans +// where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and +// m and n refer to the size of the full dense matrix it represents. +// x and y are vectors, and alpha and beta are scalars. +func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + // declared at cblas.h:176:6 void cblas_sgbmv ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if kL < 0 { + panic("blas: kL < 0") + } + if kU < 0 { + panic("blas: kU < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + var lenX, lenY int + if tA == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 { + panic("blas: index of a out of range") + } + C.cblas_sgbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.int(m), C.int(n), C.int(kL), C.int(kU), C.float(alpha), (*C.float)(&a[0]), C.int(lda), (*C.float)(&x[0]), C.int(incX), C.float(beta), (*C.float)(&y[0]), C.int(incY)) +} + +// Strmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// A is an n×n Triangular matrix and x is a vector. +func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { + // declared at cblas.h:181:6 void cblas_strmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_strmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), (*C.float)(&a[0]), C.int(lda), (*C.float)(&x[0]), C.int(incX)) +} + +// Stbmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// where A is an n×n triangular banded matrix with k diagonals, and x is a vector. +func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { + // declared at cblas.h:185:6 void cblas_stbmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_stbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), C.int(k), (*C.float)(&a[0]), C.int(lda), (*C.float)(&x[0]), C.int(incX)) +} + +// Stpmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// where A is an n×n unit triangular matrix in packed format, and x is a vector. +func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap, x []float32, incX int) { + // declared at cblas.h:189:6 void cblas_stpmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_stpmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), (*C.float)(&ap[0]), (*C.float)(&x[0]), C.int(incX)) +} + +// Strsv solves +// A * x = b if tA == blas.NoTrans +// A^T * x = b if tA == blas.Trans or blas.ConjTrans +// A is an n×n triangular matrix and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { + // declared at cblas.h:192:6 void cblas_strsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_strsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), (*C.float)(&a[0]), C.int(lda), (*C.float)(&x[0]), C.int(incX)) +} + +// Stbsv solves +// A * x = b +// where A is an n×n triangular banded matrix with k diagonals in packed format, +// and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { + // declared at cblas.h:196:6 void cblas_stbsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_stbsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), C.int(k), (*C.float)(&a[0]), C.int(lda), (*C.float)(&x[0]), C.int(incX)) +} + +// Stpsv solves +// A * x = b if tA == blas.NoTrans +// A^T * x = b if tA == blas.Trans or blas.ConjTrans +// where A is an n×n triangular matrix in packed format and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap, x []float32, incX int) { + // declared at cblas.h:200:6 void cblas_stpsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_stpsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), (*C.float)(&ap[0]), (*C.float)(&x[0]), C.int(incX)) +} + +// Dgemv computes +// y = alpha * a * x + beta * y if tA = blas.NoTrans +// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + // declared at cblas.h:204:6 void cblas_dgemv ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + var lenX, lenY int + if tA == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_dgemv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.int(m), C.int(n), C.double(alpha), (*C.double)(&a[0]), C.int(lda), (*C.double)(&x[0]), C.int(incX), C.double(beta), (*C.double)(&y[0]), C.int(incY)) +} + +// Dgbmv computes +// y = alpha * A * x + beta * y if tA == blas.NoTrans +// y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans +// where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and +// m and n refer to the size of the full dense matrix it represents. +// x and y are vectors, and alpha and beta are scalars. +func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + // declared at cblas.h:209:6 void cblas_dgbmv ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if kL < 0 { + panic("blas: kL < 0") + } + if kU < 0 { + panic("blas: kU < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + var lenX, lenY int + if tA == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 { + panic("blas: index of a out of range") + } + C.cblas_dgbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.int(m), C.int(n), C.int(kL), C.int(kU), C.double(alpha), (*C.double)(&a[0]), C.int(lda), (*C.double)(&x[0]), C.int(incX), C.double(beta), (*C.double)(&y[0]), C.int(incY)) +} + +// Dtrmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// A is an n×n Triangular matrix and x is a vector. +func (Implementation) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { + // declared at cblas.h:214:6 void cblas_dtrmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_dtrmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), (*C.double)(&a[0]), C.int(lda), (*C.double)(&x[0]), C.int(incX)) +} + +// Dtbmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// where A is an n×n triangular banded matrix with k diagonals, and x is a vector. +func (Implementation) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { + // declared at cblas.h:218:6 void cblas_dtbmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_dtbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), C.int(k), (*C.double)(&a[0]), C.int(lda), (*C.double)(&x[0]), C.int(incX)) +} + +// Dtpmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// where A is an n×n unit triangular matrix in packed format, and x is a vector. +func (Implementation) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap, x []float64, incX int) { + // declared at cblas.h:222:6 void cblas_dtpmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_dtpmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), (*C.double)(&ap[0]), (*C.double)(&x[0]), C.int(incX)) +} + +// Dtrsv solves +// A * x = b if tA == blas.NoTrans +// A^T * x = b if tA == blas.Trans or blas.ConjTrans +// A is an n×n triangular matrix and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { + // declared at cblas.h:225:6 void cblas_dtrsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_dtrsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), (*C.double)(&a[0]), C.int(lda), (*C.double)(&x[0]), C.int(incX)) +} + +// Dtbsv solves +// A * x = b +// where A is an n×n triangular banded matrix with k diagonals in packed format, +// and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { + // declared at cblas.h:229:6 void cblas_dtbsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_dtbsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), C.int(k), (*C.double)(&a[0]), C.int(lda), (*C.double)(&x[0]), C.int(incX)) +} + +// Dtpsv solves +// A * x = b if tA == blas.NoTrans +// A^T * x = b if tA == blas.Trans or blas.ConjTrans +// where A is an n×n triangular matrix in packed format and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap, x []float64, incX int) { + // declared at cblas.h:233:6 void cblas_dtpsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_dtpsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), (*C.double)(&ap[0]), (*C.double)(&x[0]), C.int(incX)) +} + +func (Implementation) Cgemv(tA blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) { + // declared at cblas.h:237:6 void cblas_cgemv ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + var lenX, lenY int + if tA == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_cgemv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Cgbmv(tA blas.Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) { + // declared at cblas.h:242:6 void cblas_cgbmv ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if kL < 0 { + panic("blas: kL < 0") + } + if kU < 0 { + panic("blas: kU < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + var lenX, lenY int + if tA == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 { + panic("blas: index of a out of range") + } + C.cblas_cgbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.int(m), C.int(n), C.int(kL), C.int(kU), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Ctrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) { + // declared at cblas.h:247:6 void cblas_ctrmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_ctrmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ctbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) { + // declared at cblas.h:251:6 void cblas_ctbmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_ctbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), C.int(k), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ctpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap, x []complex64, incX int) { + // declared at cblas.h:255:6 void cblas_ctpmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_ctpmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), unsafe.Pointer(&ap[0]), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ctrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) { + // declared at cblas.h:258:6 void cblas_ctrsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_ctrsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ctbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) { + // declared at cblas.h:262:6 void cblas_ctbsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_ctbsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), C.int(k), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ctpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap, x []complex64, incX int) { + // declared at cblas.h:266:6 void cblas_ctpsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_ctpsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), unsafe.Pointer(&ap[0]), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Zgemv(tA blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) { + // declared at cblas.h:270:6 void cblas_zgemv ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + var lenX, lenY int + if tA == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_zgemv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Zgbmv(tA blas.Transpose, m, n, kL, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) { + // declared at cblas.h:275:6 void cblas_zgbmv ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if kL < 0 { + panic("blas: kL < 0") + } + if kU < 0 { + panic("blas: kU < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + var lenX, lenY int + if tA == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 { + panic("blas: index of a out of range") + } + C.cblas_zgbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.int(m), C.int(n), C.int(kL), C.int(kU), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Ztrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex128, lda int, x []complex128, incX int) { + // declared at cblas.h:280:6 void cblas_ztrmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_ztrmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ztbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) { + // declared at cblas.h:284:6 void cblas_ztbmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_ztbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), C.int(k), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ztpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap, x []complex128, incX int) { + // declared at cblas.h:288:6 void cblas_ztpmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_ztpmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), unsafe.Pointer(&ap[0]), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ztrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex128, lda int, x []complex128, incX int) { + // declared at cblas.h:291:6 void cblas_ztrsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_ztrsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ztbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) { + // declared at cblas.h:295:6 void cblas_ztbsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_ztbsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), C.int(k), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX)) +} + +func (Implementation) Ztpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap, x []complex128, incX int) { + // declared at cblas.h:299:6 void cblas_ztpsv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_ztpsv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(n), unsafe.Pointer(&ap[0]), unsafe.Pointer(&x[0]), C.int(incX)) +} + +// Ssymv computes +// y = alpha * A * x + beta * y, +// where a is an n×n symmetric matrix, x and y are vectors, and alpha and +// beta are scalars. +func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + // declared at cblas.h:307:6 void cblas_ssymv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_ssymv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.float(alpha), (*C.float)(&a[0]), C.int(lda), (*C.float)(&x[0]), C.int(incX), C.float(beta), (*C.float)(&y[0]), C.int(incY)) +} + +// Ssbmv performs +// y = alpha * A * x + beta * y +// where A is an n×n symmetric banded matrix, x and y are vectors, and alpha +// and beta are scalars. +func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + // declared at cblas.h:311:6 void cblas_ssbmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_ssbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.int(k), C.float(alpha), (*C.float)(&a[0]), C.int(lda), (*C.float)(&x[0]), C.int(incX), C.float(beta), (*C.float)(&y[0]), C.int(incY)) +} + +// Sspmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric matrix in packed format, x and y are vectors +// and alpha and beta are scalars. +func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, ap, x []float32, incX int, beta float32, y []float32, incY int) { + // declared at cblas.h:315:6 void cblas_sspmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_sspmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.float(alpha), (*C.float)(&ap[0]), (*C.float)(&x[0]), C.int(incX), C.float(beta), (*C.float)(&y[0]), C.int(incY)) +} + +// Sger performs the rank-one operation +// A += alpha * x * y^T +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { + // declared at cblas.h:319:6 void cblas_sger ... + + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_sger(C.enum_CBLAS_ORDER(rowMajor), C.int(m), C.int(n), C.float(alpha), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY), (*C.float)(&a[0]), C.int(lda)) +} + +// Ssyr performs the rank-one update +// a += alpha * x * x^T +// where a is an n×n symmetric matrix, and x is a vector. +func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) { + // declared at cblas.h:322:6 void cblas_ssyr ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_ssyr(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.float(alpha), (*C.float)(&x[0]), C.int(incX), (*C.float)(&a[0]), C.int(lda)) +} + +// Sspr computes the rank-one operation +// a += alpha * x * x^T +// where a is an n×n symmetric matrix in packed format, x is a vector, and +// alpha is a scalar. +func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, ap []float32) { + // declared at cblas.h:325:6 void cblas_sspr ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_sspr(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.float(alpha), (*C.float)(&x[0]), C.int(incX), (*C.float)(&ap[0])) +} + +// Ssyr2 performs the symmetric rank-two update +// A += alpha * x * y^T + alpha * y * x^T +// where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { + // declared at cblas.h:328:6 void cblas_ssyr2 ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_ssyr2(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.float(alpha), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY), (*C.float)(&a[0]), C.int(lda)) +} + +// Sspr2 performs the symmetric rank-2 update +// A += alpha * x * y^T + alpha * y * x^T, +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) { + // declared at cblas.h:332:6 void cblas_sspr2 ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_sspr2(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.float(alpha), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY), (*C.float)(&ap[0])) +} + +// Dsymv computes +// y = alpha * A * x + beta * y, +// where a is an n×n symmetric matrix, x and y are vectors, and alpha and +// beta are scalars. +func (Implementation) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + // declared at cblas.h:336:6 void cblas_dsymv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_dsymv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.double(alpha), (*C.double)(&a[0]), C.int(lda), (*C.double)(&x[0]), C.int(incX), C.double(beta), (*C.double)(&y[0]), C.int(incY)) +} + +// Dsbmv performs +// y = alpha * A * x + beta * y +// where A is an n×n symmetric banded matrix, x and y are vectors, and alpha +// and beta are scalars. +func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + // declared at cblas.h:340:6 void cblas_dsbmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_dsbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.int(k), C.double(alpha), (*C.double)(&a[0]), C.int(lda), (*C.double)(&x[0]), C.int(incX), C.double(beta), (*C.double)(&y[0]), C.int(incY)) +} + +// Dspmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric matrix in packed format, x and y are vectors +// and alpha and beta are scalars. +func (Implementation) Dspmv(ul blas.Uplo, n int, alpha float64, ap, x []float64, incX int, beta float64, y []float64, incY int) { + // declared at cblas.h:344:6 void cblas_dspmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_dspmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.double(alpha), (*C.double)(&ap[0]), (*C.double)(&x[0]), C.int(incX), C.double(beta), (*C.double)(&y[0]), C.int(incY)) +} + +// Dger performs the rank-one operation +// A += alpha * x * y^T +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { + // declared at cblas.h:348:6 void cblas_dger ... + + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_dger(C.enum_CBLAS_ORDER(rowMajor), C.int(m), C.int(n), C.double(alpha), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY), (*C.double)(&a[0]), C.int(lda)) +} + +// Dsyr performs the rank-one update +// a += alpha * x * x^T +// where a is an n×n symmetric matrix, and x is a vector. +func (Implementation) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) { + // declared at cblas.h:351:6 void cblas_dsyr ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_dsyr(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.double(alpha), (*C.double)(&x[0]), C.int(incX), (*C.double)(&a[0]), C.int(lda)) +} + +// Dspr computes the rank-one operation +// a += alpha * x * x^T +// where a is an n×n symmetric matrix in packed format, x is a vector, and +// alpha is a scalar. +func (Implementation) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, ap []float64) { + // declared at cblas.h:354:6 void cblas_dspr ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_dspr(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.double(alpha), (*C.double)(&x[0]), C.int(incX), (*C.double)(&ap[0])) +} + +// Dsyr2 performs the symmetric rank-two update +// A += alpha * x * y^T + alpha * y * x^T +// where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { + // declared at cblas.h:357:6 void cblas_dsyr2 ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_dsyr2(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.double(alpha), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY), (*C.double)(&a[0]), C.int(lda)) +} + +// Dspr2 performs the symmetric rank-2 update +// A += alpha * x * y^T + alpha * y * x^T, +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func (Implementation) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, ap []float64) { + // declared at cblas.h:361:6 void cblas_dspr2 ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_dspr2(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.double(alpha), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY), (*C.double)(&ap[0])) +} + +func (Implementation) Chemv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) { + // declared at cblas.h:369:6 void cblas_chemv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_chemv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Chbmv(ul blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) { + // declared at cblas.h:373:6 void cblas_chbmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_chbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Chpmv(ul blas.Uplo, n int, alpha complex64, ap, x []complex64, incX int, beta complex64, y []complex64, incY int) { + // declared at cblas.h:377:6 void cblas_chpmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_chpmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&ap[0]), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) { + // declared at cblas.h:381:6 void cblas_cgeru ... + + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_cgeru(C.enum_CBLAS_ORDER(rowMajor), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&a[0]), C.int(lda)) +} + +func (Implementation) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) { + // declared at cblas.h:384:6 void cblas_cgerc ... + + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_cgerc(C.enum_CBLAS_ORDER(rowMajor), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&a[0]), C.int(lda)) +} + +func (Implementation) Cher(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int) { + // declared at cblas.h:387:6 void cblas_cher ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_cher(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.float(alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&a[0]), C.int(lda)) +} + +func (Implementation) Chpr(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, ap []complex64) { + // declared at cblas.h:390:6 void cblas_chpr ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_chpr(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.float(alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&ap[0])) +} + +func (Implementation) Cher2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) { + // declared at cblas.h:393:6 void cblas_cher2 ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_cher2(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&a[0]), C.int(lda)) +} + +func (Implementation) Chpr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64) { + // declared at cblas.h:396:6 void cblas_chpr2 ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_chpr2(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&ap[0])) +} + +func (Implementation) Zhemv(ul blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) { + // declared at cblas.h:400:6 void cblas_zhemv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_zhemv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Zhbmv(ul blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) { + // declared at cblas.h:404:6 void cblas_zhbmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } + C.cblas_zhbmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Zhpmv(ul blas.Uplo, n int, alpha complex128, ap, x []complex128, incX int, beta complex128, y []complex128, incY int) { + // declared at cblas.h:408:6 void cblas_zhpmv ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_zhpmv(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&ap[0]), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&beta), unsafe.Pointer(&y[0]), C.int(incY)) +} + +func (Implementation) Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) { + // declared at cblas.h:412:6 void cblas_zgeru ... + + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_zgeru(C.enum_CBLAS_ORDER(rowMajor), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&a[0]), C.int(lda)) +} + +func (Implementation) Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) { + // declared at cblas.h:415:6 void cblas_zgerc ... + + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_zgerc(C.enum_CBLAS_ORDER(rowMajor), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&a[0]), C.int(lda)) +} + +func (Implementation) Zher(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int) { + // declared at cblas.h:418:6 void cblas_zher ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_zher(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.double(alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&a[0]), C.int(lda)) +} + +func (Implementation) Zhpr(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, ap []complex128) { + // declared at cblas.h:421:6 void cblas_zhpr ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if n == 0 { + return + } + C.cblas_zhpr(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), C.double(alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&ap[0])) +} + +func (Implementation) Zher2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) { + // declared at cblas.h:424:6 void cblas_zher2 ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } + C.cblas_zher2(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&a[0]), C.int(lda)) +} + +func (Implementation) Zhpr2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, ap []complex128) { + // declared at cblas.h:427:6 void cblas_zhpr2 ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return + } + C.cblas_zhpr2(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&ap[0])) +} + +// Sgemm computes +// C = beta * C + alpha * A * B, +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed. +func (Implementation) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { + // declared at cblas.h:440:6 void cblas_sgemm ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if tB != blas.NoTrans && tB != blas.Trans && tB != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var rowA, colA, rowB, colB int + if tA == blas.NoTrans { + rowA, colA = m, k + } else { + rowA, colA = k, m + } + if tB == blas.NoTrans { + rowB, colB = k, n + } else { + rowB, colB = n, k + } + if lda*(rowA-1)+colA > len(a) || lda < max(1, colA) { + panic("blas: index of a out of range") + } + if ldb*(rowB-1)+colB > len(b) || ldb < max(1, colB) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_sgemm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_TRANSPOSE(tB), C.int(m), C.int(n), C.int(k), C.float(alpha), (*C.float)(&a[0]), C.int(lda), (*C.float)(&b[0]), C.int(ldb), C.float(beta), (*C.float)(&c[0]), C.int(ldc)) +} + +// Ssymm performs one of +// C = alpha * A * B + beta * C, if side == blas.Left, +// C = alpha * B * A + beta * C, if side == blas.Right, +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha +// is a scalar. +func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { + // declared at cblas.h:445:6 void cblas_ssymm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_ssymm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.int(m), C.int(n), C.float(alpha), (*C.float)(&a[0]), C.int(lda), (*C.float)(&b[0]), C.int(ldb), C.float(beta), (*C.float)(&c[0]), C.int(ldc)) +} + +// Ssyrk performs the symmetric rank-k operation +// C = alpha * A * A^T + beta*C +// C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and +// a k×n matrix otherwise. alpha and beta are scalars. +func (Implementation) Ssyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) { + // declared at cblas.h:450:6 void cblas_ssyrk ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.Trans && t != blas.ConjTrans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_ssyrk(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), C.float(alpha), (*C.float)(&a[0]), C.int(lda), C.float(beta), (*C.float)(&c[0]), C.int(ldc)) +} + +// Ssyr2k performs the symmetric rank 2k operation +// C = alpha * A * B^T + alpha * B * A^T + beta * C +// where C is an n×n symmetric matrix. A and B are n×k matrices if +// tA == NoTrans and k×n otherwise. alpha and beta are scalars. +func (Implementation) Ssyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { + // declared at cblas.h:454:6 void cblas_ssyr2k ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.Trans && t != blas.ConjTrans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldb*(row-1)+col > len(b) || ldb < max(1, col) { + panic("blas: index of b out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_ssyr2k(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), C.float(alpha), (*C.float)(&a[0]), C.int(lda), (*C.float)(&b[0]), C.int(ldb), C.float(beta), (*C.float)(&c[0]), C.int(ldc)) +} + +// Strmm performs +// B = alpha * A * B, if tA == blas.NoTrans and side == blas.Left, +// B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// B = alpha * B * A, if tA == blas.NoTrans and side == blas.Right, +// B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, and B is an m×n matrix. +func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { + // declared at cblas.h:459:6 void cblas_strmm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + C.cblas_strmm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(m), C.int(n), C.float(alpha), (*C.float)(&a[0]), C.int(lda), (*C.float)(&b[0]), C.int(ldb)) +} + +// Strsm solves +// A * X = alpha * B, if tA == blas.NoTrans side == blas.Left, +// A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// X * A = alpha * B, if tA == blas.NoTrans side == blas.Right, +// X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a +// scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in place into X. +// +// No check is made that A is invertible. +func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { + // declared at cblas.h:464:6 void cblas_strsm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + C.cblas_strsm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(m), C.int(n), C.float(alpha), (*C.float)(&a[0]), C.int(lda), (*C.float)(&b[0]), C.int(ldb)) +} + +// Dgemm computes +// C = beta * C + alpha * A * B, +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed. +func (Implementation) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + // declared at cblas.h:470:6 void cblas_dgemm ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if tB != blas.NoTrans && tB != blas.Trans && tB != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var rowA, colA, rowB, colB int + if tA == blas.NoTrans { + rowA, colA = m, k + } else { + rowA, colA = k, m + } + if tB == blas.NoTrans { + rowB, colB = k, n + } else { + rowB, colB = n, k + } + if lda*(rowA-1)+colA > len(a) || lda < max(1, colA) { + panic("blas: index of a out of range") + } + if ldb*(rowB-1)+colB > len(b) || ldb < max(1, colB) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_dgemm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_TRANSPOSE(tB), C.int(m), C.int(n), C.int(k), C.double(alpha), (*C.double)(&a[0]), C.int(lda), (*C.double)(&b[0]), C.int(ldb), C.double(beta), (*C.double)(&c[0]), C.int(ldc)) +} + +// Dsymm performs one of +// C = alpha * A * B + beta * C, if side == blas.Left, +// C = alpha * B * A + beta * C, if side == blas.Right, +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha +// is a scalar. +func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + // declared at cblas.h:475:6 void cblas_dsymm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_dsymm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.int(m), C.int(n), C.double(alpha), (*C.double)(&a[0]), C.int(lda), (*C.double)(&b[0]), C.int(ldb), C.double(beta), (*C.double)(&c[0]), C.int(ldc)) +} + +// Dsyrk performs the symmetric rank-k operation +// C = alpha * A * A^T + beta*C +// C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and +// a k×n matrix otherwise. alpha and beta are scalars. +func (Implementation) Dsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) { + // declared at cblas.h:480:6 void cblas_dsyrk ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.Trans && t != blas.ConjTrans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_dsyrk(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), C.double(alpha), (*C.double)(&a[0]), C.int(lda), C.double(beta), (*C.double)(&c[0]), C.int(ldc)) +} + +// Dsyr2k performs the symmetric rank 2k operation +// C = alpha * A * B^T + alpha * B * A^T + beta * C +// where C is an n×n symmetric matrix. A and B are n×k matrices if +// tA == NoTrans and k×n otherwise. alpha and beta are scalars. +func (Implementation) Dsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + // declared at cblas.h:484:6 void cblas_dsyr2k ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.Trans && t != blas.ConjTrans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldb*(row-1)+col > len(b) || ldb < max(1, col) { + panic("blas: index of b out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_dsyr2k(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), C.double(alpha), (*C.double)(&a[0]), C.int(lda), (*C.double)(&b[0]), C.int(ldb), C.double(beta), (*C.double)(&c[0]), C.int(ldc)) +} + +// Dtrmm performs +// B = alpha * A * B, if tA == blas.NoTrans and side == blas.Left, +// B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// B = alpha * B * A, if tA == blas.NoTrans and side == blas.Right, +// B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, and B is an m×n matrix. +func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { + // declared at cblas.h:489:6 void cblas_dtrmm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + C.cblas_dtrmm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(m), C.int(n), C.double(alpha), (*C.double)(&a[0]), C.int(lda), (*C.double)(&b[0]), C.int(ldb)) +} + +// Dtrsm solves +// A * X = alpha * B, if tA == blas.NoTrans side == blas.Left, +// A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// X * A = alpha * B, if tA == blas.NoTrans side == blas.Right, +// X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a +// scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in place into X. +// +// No check is made that A is invertible. +func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { + // declared at cblas.h:494:6 void cblas_dtrsm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + C.cblas_dtrsm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(m), C.int(n), C.double(alpha), (*C.double)(&a[0]), C.int(lda), (*C.double)(&b[0]), C.int(ldb)) +} + +func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { + // declared at cblas.h:500:6 void cblas_cgemm ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if tB != blas.NoTrans && tB != blas.Trans && tB != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var rowA, colA, rowB, colB int + if tA == blas.NoTrans { + rowA, colA = m, k + } else { + rowA, colA = k, m + } + if tB == blas.NoTrans { + rowB, colB = k, n + } else { + rowB, colB = n, k + } + if lda*(rowA-1)+colA > len(a) || lda < max(1, colA) { + panic("blas: index of a out of range") + } + if ldb*(rowB-1)+colB > len(b) || ldb < max(1, colB) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_cgemm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_TRANSPOSE(tB), C.int(m), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Csymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { + // declared at cblas.h:505:6 void cblas_csymm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_csymm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Csyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) { + // declared at cblas.h:510:6 void cblas_csyrk ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.Trans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_csyrk(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Csyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { + // declared at cblas.h:514:6 void cblas_csyr2k ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.Trans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldb*(row-1)+col > len(b) || ldb < max(1, col) { + panic("blas: index of b out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_csyr2k(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Ctrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) { + // declared at cblas.h:519:6 void cblas_ctrmm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + C.cblas_ctrmm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb)) +} + +func (Implementation) Ctrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) { + // declared at cblas.h:524:6 void cblas_ctrsm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + C.cblas_ctrsm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb)) +} + +func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { + // declared at cblas.h:530:6 void cblas_zgemm ... + + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if tB != blas.NoTrans && tB != blas.Trans && tB != blas.ConjTrans { + panic("blas: illegal transpose") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var rowA, colA, rowB, colB int + if tA == blas.NoTrans { + rowA, colA = m, k + } else { + rowA, colA = k, m + } + if tB == blas.NoTrans { + rowB, colB = k, n + } else { + rowB, colB = n, k + } + if lda*(rowA-1)+colA > len(a) || lda < max(1, colA) { + panic("blas: index of a out of range") + } + if ldb*(rowB-1)+colB > len(b) || ldb < max(1, colB) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_zgemm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_TRANSPOSE(tB), C.int(m), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { + // declared at cblas.h:535:6 void cblas_zsymm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_zsymm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) { + // declared at cblas.h:540:6 void cblas_zsyrk ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.Trans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_zsyrk(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { + // declared at cblas.h:544:6 void cblas_zsyr2k ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.Trans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldb*(row-1)+col > len(b) || ldb < max(1, col) { + panic("blas: index of b out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_zsyr2k(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) { + // declared at cblas.h:549:6 void cblas_ztrmm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + C.cblas_ztrmm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb)) +} + +func (Implementation) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) { + // declared at cblas.h:554:6 void cblas_ztrsm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic("blas: illegal transpose") + } + if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + C.cblas_ztrsm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(tA), C.enum_CBLAS_DIAG(d), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb)) +} + +func (Implementation) Chemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { + // declared at cblas.h:564:6 void cblas_chemm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_chemm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Cherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) { + // declared at cblas.h:569:6 void cblas_cherk ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.ConjTrans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_cherk(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), C.float(alpha), unsafe.Pointer(&a[0]), C.int(lda), C.float(beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Cher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) { + // declared at cblas.h:573:6 void cblas_cher2k ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.ConjTrans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldb*(row-1)+col > len(b) || ldb < max(1, col) { + panic("blas: index of b out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_cher2k(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), C.float(beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { + // declared at cblas.h:578:6 void cblas_zhemm ... + + if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if m < 0 { + panic("blas: m < 0") + } + if n < 0 { + panic("blas: n < 0") + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_zhemm(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_SIDE(s), C.enum_CBLAS_UPLO(ul), C.int(m), C.int(n), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), unsafe.Pointer(&beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Zherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) { + // declared at cblas.h:583:6 void cblas_zherk ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.ConjTrans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_zherk(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), C.double(alpha), unsafe.Pointer(&a[0]), C.int(lda), C.double(beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} + +func (Implementation) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) { + // declared at cblas.h:587:6 void cblas_zher2k ... + + if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } + if t != blas.NoTrans && t != blas.ConjTrans { + panic("blas: illegal transpose") + } + if n < 0 { + panic("blas: n < 0") + } + if k < 0 { + panic("blas: k < 0") + } + var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic("blas: index of a out of range") + } + if ldb*(row-1)+col > len(b) || ldb < max(1, col) { + panic("blas: index of b out of range") + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } + C.cblas_zher2k(C.enum_CBLAS_ORDER(rowMajor), C.enum_CBLAS_UPLO(ul), C.enum_CBLAS_TRANSPOSE(t), C.int(n), C.int(k), unsafe.Pointer(&alpha), unsafe.Pointer(&a[0]), C.int(lda), unsafe.Pointer(&b[0]), C.int(ldb), C.double(beta), unsafe.Pointer(&c[0]), C.int(ldc)) +} diff --git a/blas/cgo/cblas.h b/blas/cgo/cblas.h new file mode 100644 index 00000000..b8ac9a5f --- /dev/null +++ b/blas/cgo/cblas.h @@ -0,0 +1,596 @@ +#ifndef CBLAS_H + +#ifndef CBLAS_ENUM_DEFINED_H + #define CBLAS_ENUM_DEFINED_H + enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102 }; + enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, + AtlasConj=114}; + enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; + enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; + enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; +#endif + +#ifndef CBLAS_ENUM_ONLY +#define CBLAS_H +#define CBLAS_INDEX int + +int cblas_errprn(int ierr, int info, char *form, ...); + +/* + * =========================================================================== + * Prototypes for level 1 BLAS functions (complex are recast as routines) + * =========================================================================== + */ +float cblas_sdsdot(const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY); +double cblas_dsdot(const int N, const float *X, const int incX, const float *Y, + const int incY); +float cblas_sdot(const int N, const float *X, const int incX, + const float *Y, const int incY); +double cblas_ddot(const int N, const double *X, const int incX, + const double *Y, const int incY); +/* + * Functions having prefixes Z and C only + */ +void cblas_cdotu_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotu); +void cblas_cdotc_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotc); + +void cblas_zdotu_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotu); +void cblas_zdotc_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotc); + + +/* + * Functions having prefixes S D SC DZ + */ +float cblas_snrm2(const int N, const float *X, const int incX); +float cblas_sasum(const int N, const float *X, const int incX); + +double cblas_dnrm2(const int N, const double *X, const int incX); +double cblas_dasum(const int N, const double *X, const int incX); + +float cblas_scnrm2(const int N, const void *X, const int incX); +float cblas_scasum(const int N, const void *X, const int incX); + +double cblas_dznrm2(const int N, const void *X, const int incX); +double cblas_dzasum(const int N, const void *X, const int incX); + + +/* + * Functions having standard 4 prefixes (S D C Z) + */ +CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX); +CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX); +CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); +CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); + +/* + * =========================================================================== + * Prototypes for level 1 BLAS routines + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (s, d, c, z) + */ +void cblas_sswap(const int N, float *X, const int incX, + float *Y, const int incY); +void cblas_scopy(const int N, const float *X, const int incX, + float *Y, const int incY); +void cblas_saxpy(const int N, const float alpha, const float *X, + const int incX, float *Y, const int incY); +void catlas_saxpby(const int N, const float alpha, const float *X, + const int incX, const float beta, float *Y, const int incY); +void catlas_sset + (const int N, const float alpha, float *X, const int incX); + +void cblas_dswap(const int N, double *X, const int incX, + double *Y, const int incY); +void cblas_dcopy(const int N, const double *X, const int incX, + double *Y, const int incY); +void cblas_daxpy(const int N, const double alpha, const double *X, + const int incX, double *Y, const int incY); +void catlas_daxpby(const int N, const double alpha, const double *X, + const int incX, const double beta, double *Y, const int incY); +void catlas_dset + (const int N, const double alpha, double *X, const int incX); + +void cblas_cswap(const int N, void *X, const int incX, + void *Y, const int incY); +void cblas_ccopy(const int N, const void *X, const int incX, + void *Y, const int incY); +void cblas_caxpy(const int N, const void *alpha, const void *X, + const int incX, void *Y, const int incY); +void catlas_caxpby(const int N, const void *alpha, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void catlas_cset + (const int N, const void *alpha, void *X, const int incX); + +void cblas_zswap(const int N, void *X, const int incX, + void *Y, const int incY); +void cblas_zcopy(const int N, const void *X, const int incX, + void *Y, const int incY); +void cblas_zaxpy(const int N, const void *alpha, const void *X, + const int incX, void *Y, const int incY); +void catlas_zaxpby(const int N, const void *alpha, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void catlas_zset + (const int N, const void *alpha, void *X, const int incX); + + +/* + * Routines with S and D prefix only + */ +void cblas_srotg(float *a, float *b, float *c, float *s); +void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P); +void cblas_srot(const int N, float *X, const int incX, + float *Y, const int incY, const float c, const float s); +void cblas_srotm(const int N, float *X, const int incX, + float *Y, const int incY, const float *P); + +void cblas_drotg(double *a, double *b, double *c, double *s); +void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P); +void cblas_drot(const int N, double *X, const int incX, + double *Y, const int incY, const double c, const double s); +void cblas_drotm(const int N, double *X, const int incX, + double *Y, const int incY, const double *P); + + +/* + * Routines with S D C Z CS and ZD prefixes + */ +void cblas_sscal(const int N, const float alpha, float *X, const int incX); +void cblas_dscal(const int N, const double alpha, double *X, const int incX); +void cblas_cscal(const int N, const void *alpha, void *X, const int incX); +void cblas_zscal(const int N, const void *alpha, void *X, const int incX); +void cblas_csscal(const int N, const float alpha, void *X, const int incX); +void cblas_zdscal(const int N, const double alpha, void *X, const int incX); + +/* + * Extra reference routines provided by ATLAS, but not mandated by the standard + */ +void cblas_crotg(void *a, void *b, void *c, void *s); +void cblas_zrotg(void *a, void *b, void *c, void *s); +void cblas_csrot(const int N, void *X, const int incX, void *Y, const int incY, + const float c, const float s); +void cblas_zdrot(const int N, void *X, const int incX, void *Y, const int incY, + const double c, const double s); + +/* + * =========================================================================== + * Prototypes for level 2 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +void cblas_sgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *X, const int incX, const float beta, + float *Y, const int incY); +void cblas_sgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const float alpha, + const float *A, const int lda, const float *X, + const int incX, const float beta, float *Y, const int incY); +void cblas_strmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, + float *X, const int incX); +void cblas_stbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +void cblas_stpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); +void cblas_strsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, float *X, + const int incX); +void cblas_stbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +void cblas_stpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); + +void cblas_dgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *X, const int incX, const double beta, + double *Y, const int incY); +void cblas_dgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const double alpha, + const double *A, const int lda, const double *X, + const int incX, const double beta, double *Y, const int incY); +void cblas_dtrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, + double *X, const int incX); +void cblas_dtbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +void cblas_dtpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); +void cblas_dtrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, double *X, + const int incX); +void cblas_dtbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +void cblas_dtpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); + +void cblas_cgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +void cblas_cgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void cblas_ctrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +void cblas_ctbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ctpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +void cblas_ctrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +void cblas_ctbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ctpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + +void cblas_zgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +void cblas_zgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void cblas_ztrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +void cblas_ztbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ztpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +void cblas_ztrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +void cblas_ztbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ztpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + + +/* + * Routines with S and D prefixes only + */ +void cblas_ssymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_ssbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_sspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *Ap, + const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N, + const float alpha, const float *X, const int incX, + const float *Y, const int incY, float *A, const int lda); +void cblas_ssyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *A, const int lda); +void cblas_sspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *Ap); +void cblas_ssyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *A, + const int lda); +void cblas_sspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *Ap); + +void cblas_dsymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dsbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *Ap, + const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, + const double alpha, const double *X, const int incX, + const double *Y, const int incY, double *A, const int lda); +void cblas_dsyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *A, const int lda); +void cblas_dspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *Ap); +void cblas_dsyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *A, + const int lda); +void cblas_dspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *Ap); + + +/* + * Routines with C and Z prefixes only + */ +void cblas_chemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_chbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_chpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_cgeru(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_cgerc(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_cher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const void *X, const int incX, + void *A, const int lda); +void cblas_chpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const void *X, + const int incX, void *Ap); +void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_chpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +void cblas_zhemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const void *X, const int incX, + void *A, const int lda); +void cblas_zhpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const void *X, + const int incX, void *Ap); +void cblas_zher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zhpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +/* + * =========================================================================== + * Prototypes for level 3 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const float alpha, const float *A, + const int lda, const float *B, const int ldb, + const float beta, float *C, const int ldc); +void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float beta, float *C, const int ldc); +void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); +void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); + +void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const double alpha, const double *A, + const int lda, const double *B, const int ldb, + const double beta, double *C, const int ldc); +void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double beta, double *C, const int ldc); +void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); +void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); + +void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + +void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + + +/* + * Routines with prefixes C and Z only + */ +void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const void *A, const int lda, + const float beta, void *C, const int ldc); +void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const float beta, + void *C, const int ldc); +void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const void *A, const int lda, + const double beta, void *C, const int ldc); +void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const double beta, + void *C, const int ldc); + +int cblas_errprn(int ierr, int info, char *form, ...); + +#endif /* end #ifdef CBLAS_ENUM_ONLY */ +#endif diff --git a/blas/cgo/dgemmbench_test.go b/blas/cgo/dgemmbench_test.go new file mode 100644 index 00000000..e9bb3939 --- /dev/null +++ b/blas/cgo/dgemmbench_test.go @@ -0,0 +1,47 @@ +package cgo + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func BenchmarkDgemmSmSmSm(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Sm, Sm, Sm, NT, NT) +} + +func BenchmarkDgemmMedMedMed(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Med, Med, NT, NT) +} + +func BenchmarkDgemmMedLgMed(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Lg, Med, NT, NT) +} + +func BenchmarkDgemmLgLgLg(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Lg, Lg, Lg, NT, NT) +} + +func BenchmarkDgemmLgSmLg(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Lg, Sm, Lg, NT, NT) +} + +func BenchmarkDgemmLgLgSm(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Lg, Lg, Sm, NT, NT) +} + +func BenchmarkDgemmHgHgSm(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Hg, Hg, Sm, NT, NT) +} + +func BenchmarkDgemmMedMedMedTNT(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Med, Med, T, NT) +} + +func BenchmarkDgemmMedMedMedNTT(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Med, Med, NT, T) +} + +func BenchmarkDgemmMedMedMedTT(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Med, Med, T, T) +} diff --git a/blas/cgo/dgemvbench_test.go b/blas/cgo/dgemvbench_test.go new file mode 100644 index 00000000..c7617115 --- /dev/null +++ b/blas/cgo/dgemvbench_test.go @@ -0,0 +1,87 @@ +package cgo + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func BenchmarkDgemvSmSmNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Sm, Sm, 1, 1) +} + +func BenchmarkDgemvSmSmNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Sm, Sm, 2, 3) +} + +func BenchmarkDgemvSmSmTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Sm, Sm, 1, 1) +} + +func BenchmarkDgemvSmSmTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Sm, Sm, 2, 3) +} + +func BenchmarkDgemvMedMedNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Med, Med, 1, 1) +} + +func BenchmarkDgemvMedMedNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Med, Med, 2, 3) +} + +func BenchmarkDgemvMedMedTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Med, Med, 1, 1) +} + +func BenchmarkDgemvMedMedTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Med, Med, 2, 3) +} + +func BenchmarkDgemvLgLgNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Lg, Lg, 1, 1) +} + +func BenchmarkDgemvLgLgNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Lg, Lg, 2, 3) +} + +func BenchmarkDgemvLgLgTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Lg, Lg, 1, 1) +} + +func BenchmarkDgemvLgLgTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Lg, Lg, 2, 3) +} + +func BenchmarkDgemvLgSmNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Lg, Sm, 1, 1) +} + +func BenchmarkDgemvLgSmNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Lg, Sm, 2, 3) +} + +func BenchmarkDgemvLgSmTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Lg, Sm, 1, 1) +} + +func BenchmarkDgemvLgSmTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Lg, Sm, 2, 3) +} + +func BenchmarkDgemvSmLgNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Sm, Lg, 1, 1) +} + +func BenchmarkDgemvSmLgNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Sm, Lg, 2, 3) +} + +func BenchmarkDgemvSmLgTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Sm, Lg, 1, 1) +} + +func BenchmarkDgemvSmLgTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Sm, Lg, 2, 3) +} diff --git a/blas/cgo/dgerbench_test.go b/blas/cgo/dgerbench_test.go new file mode 100644 index 00000000..2656f2e4 --- /dev/null +++ b/blas/cgo/dgerbench_test.go @@ -0,0 +1,47 @@ +package cgo + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func BenchmarkDgerSmSmInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Sm, Sm, 1, 1) +} + +func BenchmarkDgerSmSmIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Sm, Sm, 2, 3) +} + +func BenchmarkDgerMedMedInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Med, Med, 1, 1) +} + +func BenchmarkDgerMedMedIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Med, Med, 2, 3) +} + +func BenchmarkDgerLgLgInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Lg, Lg, 1, 1) +} + +func BenchmarkDgerLgLgIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Lg, Lg, 2, 3) +} + +func BenchmarkDgerLgSmInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Lg, Sm, 1, 1) +} + +func BenchmarkDgerLgSmIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Lg, Sm, 2, 3) +} + +func BenchmarkDgerSmLgInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Sm, Lg, 1, 1) +} + +func BenchmarkDgerSmLgIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Sm, Lg, 2, 3) +} diff --git a/blas/cgo/doc.go b/blas/cgo/doc.go new file mode 100644 index 00000000..4e3efcf2 --- /dev/null +++ b/blas/cgo/doc.go @@ -0,0 +1,95 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run generate_blas.go + +// Ensure changes made to blas/cgo are reflected in blas/native where relevant. + +/* +Package cgo provides bindings to a C BLAS library. This wrapper interface +panics when the input arguments are invalid as per the standard, for example +if a vector increment is zero. Please note that the treatment of NaN values +is not specified, and differs among the BLAS implementations. +github.com/gonum/blas/blas64 provides helpful wrapper functions to the BLAS +interface. The rest of this text describes the layout of the data for the input types. + +Please note that in the function documentation, x[i] refers to the i^th element +of the vector, which will be different from the i^th element of the slice if +incX != 1. + +Vector arguments are effectively strided slices. They have two input arguments, +a number of elements, n, and an increment, incX. The increment specifies the +distance between elements of the vector. The actual Go slice may be longer +than necessary. +The increment may be positive or negative, except in functions with only +a single vector argument where the increment may only be positive. If the increment +is negative, s[0] is the last element in the slice. Note that this is not the same +as counting backward from the end of the slice, as len(s) may be longer than +necessary. So, for example, if n = 5 and incX = 3, the elements of s are + [0 * * 1 * * 2 * * 3 * * 4 * * * ...] +where ∗ elements are never accessed. If incX = -3, the same elements are +accessed, just in reverse order (4, 3, 2, 1, 0). + +Dense matrices are specified by a number of rows, a number of columns, and a stride. +The stride specifies the number of entries in the slice between the first element +of successive rows. The stride must be at least as large as the number of columns +but may be longer. + [a00 ... a0n a0* ... a1stride-1 a21 ... amn am* ... amstride-1] +Thus, dense[i*ld + j] refers to the {i, j}th element of the matrix. + +Symmetric and triangular matrices (non-packed) are stored identically to Dense, +except that only elements in one triangle of the matrix are accessed. + +Packed symmetric and packed triangular matrices are laid out with the entries +condensed such that all of the unreferenced elements are removed. So, the upper triangular +matrix + [ + 1 2 3 + 0 4 5 + 0 0 6 + ] +and the lower-triangular matrix + [ + 1 0 0 + 2 3 0 + 4 5 6 + ] +will both be compacted as [1 2 3 4 5 6]. The (i, j) element of the original +dense matrix can be found at element i*n - (i-1)*i/2 + j for upper triangular, +and at element i * (i+1) /2 + j for lower triangular. + +Banded matrices are laid out in a compact format, constructed by removing the +zeros in the rows and aligning the diagonals. For example, the matrix + [ + 1 2 3 0 0 0 + 4 5 6 7 0 0 + 0 8 9 10 11 0 + 0 0 12 13 14 15 + 0 0 0 16 17 18 + 0 0 0 0 19 20 + ] + +implicitly becomes (∗ entries are never accessed) + [ + * 1 2 3 + 4 5 6 7 + 8 9 10 11 + 12 13 14 15 + 16 17 18 * + 19 20 * * + ] +which is given to the BLAS routine as [∗ 1 2 3 4 ...]. + +See http://www.crest.iu.edu/research/mtl/reference/html/banded.html +for more information + +*/ +package cgo + +// BUG(btracey): The cgo package is intrinsically dependent on the underlying C +// implementation. The BLAS standard is silent on a number of behaviors, including +// but not limited to how NaN values are treated. For this reason the result of +// computations performed by the cgo BLAS package may disagree with the results +// produced by the native BLAS package. The cgo package is tested against OpenBLAS; +// use of other backing BLAS C libraries may result in test failure because of this. diff --git a/blas/cgo/dtrmvbench_test.go b/blas/cgo/dtrmvbench_test.go new file mode 100644 index 00000000..2b1c54ed --- /dev/null +++ b/blas/cgo/dtrmvbench_test.go @@ -0,0 +1,54 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.7 + +package cgo + +import ( + "strconv" + "testing" + + "github.com/gonum/blas" + "github.com/gonum/blas/testblas" +) + +func BenchmarkDtrmv(b *testing.B) { + for _, n := range []int{testblas.MediumMat, testblas.LargeMat} { + for _, incX := range []int{1, 5} { + for _, uplo := range []blas.Uplo{blas.Upper, blas.Lower} { + for _, trans := range []blas.Transpose{blas.NoTrans, blas.Trans} { + for _, unit := range []blas.Diag{blas.NonUnit, blas.Unit} { + var str string + if n == testblas.MediumMat { + str += "Med" + } else if n == testblas.LargeMat { + str += "Large" + } + str += "_Inc" + strconv.Itoa(incX) + if uplo == blas.Upper { + str += "_UP" + } else { + str += "_LO" + } + if trans == blas.NoTrans { + str += "_NT" + } else { + str += "_TR" + } + if unit == blas.NonUnit { + str += "_NU" + } else { + str += "_UN" + } + lda := n + b.Run(str, func(b *testing.B) { + testblas.DtrmvBenchmark(b, Implementation{}, n, lda, incX, uplo, trans, unit) + }) + } + } + } + } + } +} diff --git a/blas/cgo/generate_blas.go b/blas/cgo/generate_blas.go new file mode 100644 index 00000000..bb3d0366 --- /dev/null +++ b/blas/cgo/generate_blas.go @@ -0,0 +1,978 @@ +// Copyright ©2016 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// generate_blas creates a blas.go file from the provided C header file +// with optionally added documentation from the documentation package. +package main + +import ( + "bytes" + "fmt" + "go/ast" + "go/format" + "io/ioutil" + "log" + "strings" + "text/template" + + "github.com/cznic/cc" + + "github.com/gonum/internal/binding" +) + +const ( + header = "cblas.h" + documentation = "../native" + target = "blas.go" + + typ = "Implementation" + + prefix = "cblas_" + + warning = "Float32 implementations are autogenerated and not directly tested." +) + +const ( + cribDocs = true + elideRepeat = true + noteOrigin = true + separateFuncs = false +) + +var skip = map[string]bool{ + "cblas_errprn": true, + "cblas_srotg": true, + "cblas_srotmg": true, + "cblas_srotm": true, + "cblas_drotg": true, + "cblas_drotmg": true, + "cblas_drotm": true, + "cblas_crotg": true, + "cblas_zrotg": true, + "cblas_cdotu_sub": true, + "cblas_cdotc_sub": true, + "cblas_zdotu_sub": true, + "cblas_zdotc_sub": true, + + // ATLAS extensions. + "cblas_csrot": true, + "cblas_zdrot": true, +} + +var cToGoType = map[string]string{ + "int": "int", + "float": "float32", + "double": "float64", +} + +var blasEnums = map[string]*template.Template{ + "CBLAS_ORDER": template.Must(template.New("order").Parse("order")), + "CBLAS_DIAG": template.Must(template.New("diag").Parse("blas.Diag")), + "CBLAS_TRANSPOSE": template.Must(template.New("trans").Parse("blas.Transpose")), + "CBLAS_UPLO": template.Must(template.New("uplo").Parse("blas.Uplo")), + "CBLAS_SIDE": template.Must(template.New("side").Parse("blas.Side")), +} + +var cgoEnums = map[string]*template.Template{ + "CBLAS_ORDER": template.Must(template.New("order").Parse("C.enum_CBLAS_ORDER(rowMajor)")), + "CBLAS_DIAG": template.Must(template.New("diag").Parse("C.enum_CBLAS_DIAG({{.}})")), + "CBLAS_TRANSPOSE": template.Must(template.New("trans").Parse("C.enum_CBLAS_TRANSPOSE({{.}})")), + "CBLAS_UPLO": template.Must(template.New("uplo").Parse("C.enum_CBLAS_UPLO({{.}})")), + "CBLAS_SIDE": template.Must(template.New("side").Parse("C.enum_CBLAS_SIDE({{.}})")), +} + +var cgoTypes = map[binding.TypeKey]*template.Template{ + {Kind: cc.Void, IsPointer: true}: template.Must(template.New("void*").Parse( + `unsafe.Pointer(&{{.}}{{if eq . "alpha" "beta"}}{{else}}[0]{{end}})`, + )), +} + +var ( + complex64Type = map[binding.TypeKey]*template.Template{ + {Kind: cc.Void, IsPointer: true}: template.Must(template.New("void*").Parse( + `{{if eq . "alpha" "beta"}}complex64{{else}}[]complex64{{end}}`, + ))} + + complex128Type = map[binding.TypeKey]*template.Template{ + {Kind: cc.Void, IsPointer: true}: template.Must(template.New("void*").Parse( + `{{if eq . "alpha" "beta"}}complex128{{else}}[]complex128{{end}}`, + ))} +) + +var names = map[string]string{ + "uplo": "ul", + "trans": "t", + "transA": "tA", + "transB": "tB", + "side": "s", + "diag": "d", +} + +func shorten(n string) string { + s, ok := names[n] + if ok { + return s + } + return n +} + +func main() { + decls, err := binding.Declarations(header) + if err != nil { + log.Fatal(err) + } + var docs map[string]map[string][]*ast.Comment + if cribDocs { + docs, err = binding.DocComments(documentation) + if err != nil { + log.Fatal(err) + } + } + + var buf bytes.Buffer + + h, err := template.New("handwritten").Parse(handwritten) + if err != nil { + log.Fatal(err) + } + err = h.Execute(&buf, header) + if err != nil { + log.Fatal(err) + } + + var n int + for _, d := range decls { + if !strings.HasPrefix(d.Name, prefix) || skip[d.Name] { + continue + } + if n != 0 && (separateFuncs || cribDocs) { + buf.WriteByte('\n') + } + n++ + goSignature(&buf, d, docs[typ]) + if noteOrigin { + fmt.Fprintf(&buf, "\t// declared at %s %s %s ...\n\n", d.Position(), d.Return, d.Name) + } + parameterChecks(&buf, d, parameterCheckRules) + buf.WriteByte('\t') + cgoCall(&buf, d) + buf.WriteString("}\n") + } + + b, err := format.Source(buf.Bytes()) + if err != nil { + log.Fatal(err) + } + err = ioutil.WriteFile(target, b, 0664) + if err != nil { + log.Fatal(err) + } +} + +func goSignature(buf *bytes.Buffer, d binding.Declaration, docs map[string][]*ast.Comment) { + blasName := strings.TrimPrefix(d.Name, prefix) + goName := binding.UpperCaseFirst(blasName) + + if docs != nil { + if doc, ok := docs[goName]; ok { + if strings.Contains(doc[len(doc)-1].Text, warning) { + doc = doc[:len(doc)-2] + } + for _, c := range doc { + buf.WriteString(c.Text) + buf.WriteByte('\n') + } + } + } + + parameters := d.Parameters() + + var voidPtrType map[binding.TypeKey]*template.Template + for _, p := range parameters { + if p.Kind() == cc.Ptr && p.Elem().Kind() == cc.Void { + switch { + case blasName[0] == 'c', blasName[1] == 'c' && blasName[0] != 'z': + voidPtrType = complex64Type + case blasName[0] == 'z', blasName[1] == 'z': + voidPtrType = complex128Type + } + break + } + } + + fmt.Fprintf(buf, "func (%s) %s(", typ, goName) + c := 0 + for i, p := range parameters { + if p.Kind() == cc.Enum && binding.GoTypeForEnum(p.Type(), "", blasEnums) == "order" { + continue + } + if c != 0 { + buf.WriteString(", ") + } + c++ + + n := shorten(binding.LowerCaseFirst(p.Name())) + var this, next string + + if p.Kind() == cc.Enum { + this = binding.GoTypeForEnum(p.Type(), n, blasEnums) + } else { + this = binding.GoTypeFor(p.Type(), n, voidPtrType) + } + + if elideRepeat && i < len(parameters)-1 && p.Type().Kind() == parameters[i+1].Type().Kind() { + p := parameters[i+1] + n := shorten(binding.LowerCaseFirst(p.Name())) + if p.Kind() == cc.Enum { + next = binding.GoTypeForEnum(p.Type(), n, blasEnums) + } else { + next = binding.GoTypeFor(p.Type(), n, voidPtrType) + } + } + if next == this { + buf.WriteString(n) + } else { + fmt.Fprintf(buf, "%s %s", n, this) + } + } + if d.Return.Kind() != cc.Void { + fmt.Fprintf(buf, ") %s {\n", cToGoType[d.Return.String()]) + } else { + buf.WriteString(") {\n") + } +} + +func parameterChecks(buf *bytes.Buffer, d binding.Declaration, rules []func(*bytes.Buffer, binding.Declaration, binding.Parameter) bool) { + done := make(map[int]bool) + for _, p := range d.Parameters() { + for i, r := range rules { + if done[i] { + continue + } + done[i] = r(buf, d, p) + } + } +} + +func cgoCall(buf *bytes.Buffer, d binding.Declaration) { + if d.Return.Kind() != cc.Void { + fmt.Fprintf(buf, "return %s(", cToGoType[d.Return.String()]) + } + fmt.Fprintf(buf, "C.%s(", d.Name) + for i, p := range d.Parameters() { + if i != 0 { + buf.WriteString(", ") + } + if p.Type().Kind() == cc.Enum { + buf.WriteString(binding.CgoConversionForEnum(shorten(binding.LowerCaseFirst(p.Name())), p.Type(), cgoEnums)) + } else { + buf.WriteString(binding.CgoConversionFor(shorten(binding.LowerCaseFirst(p.Name())), p.Type(), cgoTypes)) + } + } + if d.Return.Kind() != cc.Void { + buf.WriteString(")") + } + buf.WriteString(")\n") +} + +var parameterCheckRules = []func(*bytes.Buffer, binding.Declaration, binding.Parameter) bool{ + trans, + uplo, + diag, + side, + + shape, + apShape, + zeroInc, + sidedShape, + mvShape, + rkShape, + gemmShape, + scalShape, + amaxShape, + nrmSumShape, + vectorShape, + othersShape, + + noWork, +} + +func amaxShape(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + switch d.Name { + case "cblas_isamax", "cblas_idamax", "cblas_icamax", "cblas_izamax": + default: + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + fmt.Fprint(buf, ` if n == 0 || incX < 0 { + return -1 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } +`) + return true +} + +func apShape(buf *bytes.Buffer, _ binding.Declaration, p binding.Parameter) bool { + n := binding.LowerCaseFirst(p.Name()) + if n != "ap" { + return false + } + fmt.Fprint(buf, ` if n*(n+1)/2 > len(ap) { + panic("blas: index of ap out of range") + } +`) + return true +} + +func diag(buf *bytes.Buffer, _ binding.Declaration, p binding.Parameter) bool { + if p.Name() != "Diag" { + return false + } + fmt.Fprint(buf, ` if d != blas.NonUnit && d != blas.Unit { + panic("blas: illegal diagonal") + } +`) + return true +} + +func gemmShape(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + switch d.Name { + case "cblas_sgemm", "cblas_dgemm", "cblas_cgemm", "cblas_zgemm": + default: + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + fmt.Fprint(buf, ` var rowA, colA, rowB, colB int + if tA == blas.NoTrans { + rowA, colA = m, k + } else { + rowA, colA = k, m + } + if tB == blas.NoTrans { + rowB, colB = k, n + } else { + rowB, colB = n, k + } + if lda*(rowA-1)+colA > len(a) || lda < max(1, colA) { + panic("blas: index of a out of range") + } + if ldb*(rowB-1)+colB > len(b) || ldb < max(1, colB) { + panic("blas: index of b out of range") + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } +`) + return true +} + +func mvShape(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + switch d.Name { + case "cblas_sgbmv", "cblas_dgbmv", "cblas_cgbmv", "cblas_zgbmv", + "cblas_sgemv", "cblas_dgemv", "cblas_cgemv", "cblas_zgemv": + default: + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + fmt.Fprint(buf, ` var lenX, lenY int + if tA == blas.NoTrans { + lenX, lenY = n, m + } else { + lenX, lenY = m, n + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic("blas: y index out of range") + } +`) + return true +} + +func noWork(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + var hasN, hasLda, hasLdb bool + for _, p := range d.Parameters() { + switch shorten(binding.LowerCaseFirst(p.Name())) { + case "n": + hasN = true + case "lda": + hasLda = true + case "ldb": + hasLdb = true + } + } + if !hasN || hasLda || hasLdb { + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + var value string + switch d.Return.String() { + case "int": + value = " -1" + case "float", "double": + value = " 0" + } + fmt.Fprintf(buf, ` if n == 0 { + return%s + } +`, value) + return true +} + +func nrmSumShape(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + switch d.Name { + case "cblas_snrm2", "cblas_dnrm2", "cblas_scnrm2", "cblas_dznrm2", + "cblas_sasum", "cblas_dasum", "cblas_scasum", "cblas_dzasum": + default: + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + fmt.Fprint(buf, ` if incX < 0 { + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } +`) + return true +} + +func rkShape(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + switch d.Name { + case "cblas_ssyrk", "cblas_dsyrk", "cblas_csyrk", "cblas_zsyrk", + "cblas_ssyr2k", "cblas_dsyr2k", "cblas_csyr2k", "cblas_zsyr2k", + "cblas_cherk", "cblas_zherk", "cblas_cher2k", "cblas_zher2k": + default: + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + fmt.Fprint(buf, ` var row, col int + if t == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } +`) + has := make(map[string]bool) + for _, p := range d.Parameters() { + if p.Kind() != cc.Ptr { + continue + } + has[shorten(binding.LowerCaseFirst(p.Name()))] = true + } + for _, label := range []string{"a", "b"} { + if has[label] { + fmt.Fprintf(buf, ` if ld%[1]s*(row-1)+col > len(%[1]s) || ld%[1]s < max(1, col) { + panic("blas: index of %[1]s out of range") + } +`, label) + } + } + if has["c"] { + fmt.Fprint(buf, ` if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } +`) + } + + return true +} + +func scalShape(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + switch d.Name { + case "cblas_sscal", "cblas_dscal", "cblas_cscal", "cblas_zscal", "cblas_csscal": + default: + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + fmt.Fprint(buf, ` if incX < 0 { + return + } + if incX > 0 && (n-1)*incX >= len(x) { + panic("blas: x index out of range") + } +`) + return true +} + +func shape(buf *bytes.Buffer, _ binding.Declaration, p binding.Parameter) bool { + switch n := binding.LowerCaseFirst(p.Name()); n { + case "m", "n", "k", "kL", "kU": + fmt.Fprintf(buf, ` if %[1]s < 0 { + panic("blas: %[1]s < 0") + } +`, n) + return false + } + return false +} + +func side(buf *bytes.Buffer, _ binding.Declaration, p binding.Parameter) bool { + if p.Name() != "Side" { + return false + } + fmt.Fprint(buf, ` if s != blas.Left && s != blas.Right { + panic("blas: illegal side") + } +`) + return true +} + +func sidedShape(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + var hasS, hasA, hasB, hasC bool + for _, p := range d.Parameters() { + switch shorten(binding.LowerCaseFirst(p.Name())) { + case "s": + hasS = true + case "a": + hasA = true + case "b": + hasB = true + case "c": + hasC = true + } + } + if !hasS { + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + if hasA && hasB { + fmt.Fprint(buf, ` var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic("blas: index of a out of range") + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic("blas: index of b out of range") + } +`) + } else { + return true + } + if hasC { + fmt.Fprint(buf, ` if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic("blas: index of c out of range") + } +`) + } + + return true +} + +func trans(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + switch n := shorten(binding.LowerCaseFirst(p.Name())); n { + case "t", "tA", "tB": + switch { + case strings.HasPrefix(d.Name, "cblas_ch"), strings.HasPrefix(d.Name, "cblas_zh"): + fmt.Fprintf(buf, ` if %[1]s != blas.NoTrans && %[1]s != blas.ConjTrans { + panic("blas: illegal transpose") + } +`, n) + case strings.HasPrefix(d.Name, "cblas_cs"), strings.HasPrefix(d.Name, "cblas_zs"): + fmt.Fprintf(buf, ` if %[1]s != blas.NoTrans && %[1]s != blas.Trans { + panic("blas: illegal transpose") + } +`, n) + default: + fmt.Fprintf(buf, ` if %[1]s != blas.NoTrans && %[1]s != blas.Trans && %[1]s != blas.ConjTrans { + panic("blas: illegal transpose") + } +`, n) + } + } + return false +} + +func uplo(buf *bytes.Buffer, _ binding.Declaration, p binding.Parameter) bool { + if p.Name() != "Uplo" { + return false + } + fmt.Fprint(buf, ` if ul != blas.Upper && ul != blas.Lower { + panic("blas: illegal triangle") + } +`) + return true +} + +func vectorShape(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + switch d.Name { + case "cblas_sgbmv", "cblas_dgbmv", "cblas_cgbmv", "cblas_zgbmv", + "cblas_sgemv", "cblas_dgemv", "cblas_cgemv", "cblas_zgemv", + "cblas_sscal", "cblas_dscal", "cblas_cscal", "cblas_zscal", "cblas_csscal", + "cblas_isamax", "cblas_idamax", "cblas_icamax", "cblas_izamax", + "cblas_snrm2", "cblas_dnrm2", "cblas_scnrm2", "cblas_dznrm2", + "cblas_sasum", "cblas_dasum", "cblas_scasum", "cblas_dzasum": + return true + } + + var hasN, hasM, hasIncX, hasIncY bool + for _, p := range d.Parameters() { + switch shorten(binding.LowerCaseFirst(p.Name())) { + case "n": + hasN = true + case "m": + hasM = true + case "incX": + hasIncX = true + case "incY": + hasIncY = true + } + } + if !hasN && !hasM { + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + var label string + if hasM { + label = "m" + } else { + label = "n" + } + if hasIncX { + fmt.Fprintf(buf, ` if (incX > 0 && (%[1]s-1)*incX >= len(x)) || (incX < 0 && (1-%[1]s)*incX >= len(x)) { + panic("blas: x index out of range") + } +`, label) + } + if hasIncY { + fmt.Fprint(buf, ` if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } +`) + } + return true +} + +func zeroInc(buf *bytes.Buffer, _ binding.Declaration, p binding.Parameter) bool { + switch n := binding.LowerCaseFirst(p.Name()); n { + case "incX": + fmt.Fprintf(buf, ` if incX == 0 { + panic("blas: zero x index increment") + } +`) + case "incY": + fmt.Fprintf(buf, ` if incY == 0 { + panic("blas: zero y index increment") + } +`) + return true + } + return false +} + +func othersShape(buf *bytes.Buffer, d binding.Declaration, p binding.Parameter) bool { + switch d.Name { + case "cblas_sgemm", "cblas_dgemm", "cblas_cgemm", "cblas_zgemm", + "cblas_ssyrk", "cblas_dsyrk", "cblas_csyrk", "cblas_zsyrk", + "cblas_ssyr2k", "cblas_dsyr2k", "cblas_csyr2k", "cblas_zsyr2k", + "cblas_cherk", "cblas_zherk", "cblas_cher2k", "cblas_zher2k": + return true + } + + has := make(map[string]bool) + for _, p := range d.Parameters() { + has[shorten(binding.LowerCaseFirst(p.Name()))] = true + } + if !has["a"] || has["s"] { + return true + } + + if d.CParameters[len(d.CParameters)-1] != p.Parameter { + return false // Come back later. + } + + switch { + case has["kL"] && has["kU"]: + fmt.Fprintf(buf, ` if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 { + panic("blas: index of a out of range") + } +`) + case has["m"]: + fmt.Fprintf(buf, ` if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } +`) + case has["k"]: + fmt.Fprintf(buf, ` if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic("blas: index of a out of range") + } +`) + default: + fmt.Fprintf(buf, ` if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic("blas: index of a out of range") + } +`) + } + + return true +} + +const handwritten = `// Do not manually edit this file. It was created by the generate_blas.go from {{.}}. + +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cgo + +/* +#cgo CFLAGS: -g -O2 +#include "{{.}}" +*/ +import "C" + +import ( + "unsafe" + + "github.com/gonum/blas" +) + +// Type check assertions: +var ( + _ blas.Float32 = Implementation{} + _ blas.Float64 = Implementation{} + _ blas.Complex64 = Implementation{} + _ blas.Complex128 = Implementation{} +) + +// Type order is used to specify the matrix storage format. We still interact with +// an API that allows client calls to specify order, so this is here to document that fact. +type order int + +const ( + rowMajor order = 101 + iota +) + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +type Implementation struct{} + +// Special cases... + +type srotmParams struct { + flag float32 + h [4]float32 +} + +type drotmParams struct { + flag float64 + h [4]float64 +} + +func (Implementation) Srotg(a float32, b float32) (c float32, s float32, r float32, z float32) { + C.cblas_srotg((*C.float)(&a), (*C.float)(&b), (*C.float)(&c), (*C.float)(&s)) + return c, s, a, b +} +func (Implementation) Srotmg(d1 float32, d2 float32, b1 float32, b2 float32) (p blas.SrotmParams, rd1 float32, rd2 float32, rb1 float32) { + var pi srotmParams + C.cblas_srotmg((*C.float)(&d1), (*C.float)(&d2), (*C.float)(&b1), C.float(b2), (*C.float)(unsafe.Pointer(&pi))) + return blas.SrotmParams{Flag: blas.Flag(pi.flag), H: pi.h}, d1, d2, b1 +} +func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if p.Flag < blas.Identity || p.Flag > blas.Diagonal { + panic("blas: illegal blas.Flag value") + } + if n == 0 { + return + } + pi := srotmParams{ + flag: float32(p.Flag), + h: p.H, + } + C.cblas_srotm(C.int(n), (*C.float)(&x[0]), C.int(incX), (*C.float)(&y[0]), C.int(incY), (*C.float)(unsafe.Pointer(&pi))) +} +func (Implementation) Drotg(a float64, b float64) (c float64, s float64, r float64, z float64) { + C.cblas_drotg((*C.double)(&a), (*C.double)(&b), (*C.double)(&c), (*C.double)(&s)) + return c, s, a, b +} +func (Implementation) Drotmg(d1 float64, d2 float64, b1 float64, b2 float64) (p blas.DrotmParams, rd1 float64, rd2 float64, rb1 float64) { + var pi drotmParams + C.cblas_drotmg((*C.double)(&d1), (*C.double)(&d2), (*C.double)(&b1), C.double(b2), (*C.double)(unsafe.Pointer(&pi))) + return blas.DrotmParams{Flag: blas.Flag(pi.flag), H: pi.h}, d1, d2, b1 +} +func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if p.Flag < blas.Identity || p.Flag > blas.Diagonal { + panic("blas: illegal blas.Flag value") + } + if n == 0 { + return + } + pi := drotmParams{ + flag: float64(p.Flag), + h: p.H, + } + C.cblas_drotm(C.int(n), (*C.double)(&x[0]), C.int(incX), (*C.double)(&y[0]), C.int(incY), (*C.double)(unsafe.Pointer(&pi))) +} +func (Implementation) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + C.cblas_cdotu_sub(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&dotu)) + return dotu +} +func (Implementation) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + C.cblas_cdotc_sub(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&dotc)) + return dotc +} +func (Implementation) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + C.cblas_zdotu_sub(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&dotu)) + return dotu +} +func (Implementation) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128) { + if n < 0 { + panic("blas: n < 0") + } + if incX == 0 { + panic("blas: zero x index increment") + } + if incY == 0 { + panic("blas: zero y index increment") + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic("blas: x index out of range") + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic("blas: y index out of range") + } + if n == 0 { + return 0 + } + C.cblas_zdotc_sub(C.int(n), unsafe.Pointer(&x[0]), C.int(incX), unsafe.Pointer(&y[0]), C.int(incY), unsafe.Pointer(&dotc)) + return dotc +} + +// Generated cases ... + +` diff --git a/blas/cgo/level1doubleBench_auto_test.go b/blas/cgo/level1doubleBench_auto_test.go new file mode 100644 index 00000000..eddd0e85 --- /dev/null +++ b/blas/cgo/level1doubleBench_auto_test.go @@ -0,0 +1,1685 @@ +// Code generated by "go run github.com/gonum/blas/testblas/benchautogen/autogen_bench_level1double.go"; DO NOT EDIT. + +// Copyright 2014 The Gonum Authors. All rights reserved. +// Use of this code is governed by a BSD-style +// license that can be found in the LICENSE file + +package cgo + +import ( + "math/rand" + "testing" + + "github.com/gonum/blas" +) + +const ( + posInc1 = 5 + posInc2 = 3 + negInc1 = -3 + negInc2 = -4 + SMALL_SLICE = 10 + MEDIUM_SLICE = 1000 + LARGE_SLICE = 100000 + HUGE_SLICE = 10000000 +) + +func randomSlice(l, idx int) []float64 { + if idx < 0 { + idx = -idx + } + s := make([]float64, l*idx) + for i := range s { + s[i] = rand.Float64() + } + return s +} + +func benchmarkDdot(b *testing.B, n int, x []float64, incX int, y []float64, incY int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Ddot(n, x, incX, y, incY) + } +} + +func BenchmarkDdotSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +/* ------------------ */ +func benchmarkDnrm2(b *testing.B, n int, x []float64, incX int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dnrm2(n, x, incX) + } +} + +func BenchmarkDnrm2SmallUnitaryInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2SmallPosInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2MediumUnitaryInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2MediumPosInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2LargeUnitaryInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2LargePosInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2HugeUnitaryInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2HugePosInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +/* ------------------ */ +func benchmarkDasum(b *testing.B, n int, x []float64, incX int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dasum(n, x, incX) + } +} + +func BenchmarkDasumSmallUnitaryInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumSmallPosInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumMediumUnitaryInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumMediumPosInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumLargeUnitaryInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumLargePosInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumHugeUnitaryInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumHugePosInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +/* ------------------ */ +func benchmarkIdamax(b *testing.B, n int, x []float64, incX int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Idamax(n, x, incX) + } +} + +func BenchmarkIdamaxSmallUnitaryInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxSmallPosInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxMediumUnitaryInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxMediumPosInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxLargeUnitaryInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxLargePosInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxHugeUnitaryInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxHugePosInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +/* ------------------ */ +func benchmarkDswap(b *testing.B, n int, x []float64, incX int, y []float64, incY int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dswap(n, x, incX, y, incY) + } +} + +func BenchmarkDswapSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +/* ------------------ */ +func benchmarkDcopy(b *testing.B, n int, x []float64, incX int, y []float64, incY int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dcopy(n, x, incX, y, incY) + } +} + +func BenchmarkDcopySmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopySmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopySmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopySmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +/* ------------------ */ +func benchmarkDaxpy(b *testing.B, n int, alpha float64, x []float64, incX int, y []float64, incY int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Daxpy(n, alpha, x, incX, y, incY) + } +} + +func BenchmarkDaxpySmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpySmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpySmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpySmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +/* ------------------ */ +func benchmarkDrot(b *testing.B, n int, x []float64, incX int, y []float64, incY int, c, s float64) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Drot(n, x, incX, y, incY, c, s) + } +} + +func BenchmarkDrotSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +/* ------------------ */ +func benchmarkDrotmOffDia(b *testing.B, n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Drotm(n, x, incX, y, incY, p) + } +} + +func BenchmarkDrotmOffDiaSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +/* ------------------ */ +func benchmarkDrotmDia(b *testing.B, n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Drotm(n, x, incX, y, incY, p) + } +} + +func BenchmarkDrotmDiaSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +/* ------------------ */ +func benchmarkDrotmResc(b *testing.B, n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Drotm(n, x, incX, y, incY, p) + } +} + +func BenchmarkDrotmRescSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +/* ------------------ */ +func benchmarkDscal(b *testing.B, n int, alpha float64, x []float64, incX int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dscal(n, alpha, x, incX) + } +} + +func BenchmarkDscalSmallUnitaryInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalSmallPosInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalMediumUnitaryInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalMediumPosInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalLargeUnitaryInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalLargePosInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalHugeUnitaryInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalHugePosInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +/* ------------------ */ diff --git a/blas/cgo/level1double_test.go b/blas/cgo/level1double_test.go new file mode 100644 index 00000000..871c6220 --- /dev/null +++ b/blas/cgo/level1double_test.go @@ -0,0 +1,57 @@ +package cgo + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +var impl Implementation + +func TestDasum(t *testing.T) { + testblas.DasumTest(t, impl) +} + +func TestDaxpy(t *testing.T) { + testblas.DaxpyTest(t, impl) +} + +func TestDdot(t *testing.T) { + testblas.DdotTest(t, impl) +} + +func TestDnrm2(t *testing.T) { + testblas.Dnrm2Test(t, impl) +} + +func TestIdamax(t *testing.T) { + testblas.IdamaxTest(t, impl) +} + +func TestDswap(t *testing.T) { + testblas.DswapTest(t, impl) +} + +func TestDcopy(t *testing.T) { + testblas.DcopyTest(t, impl) +} + +func TestDrotg(t *testing.T) { + testblas.DrotgTest(t, impl) +} + +func TestDrotmg(t *testing.T) { + testblas.DrotmgTest(t, impl) +} + +func TestDrot(t *testing.T) { + testblas.DrotTest(t, impl) +} + +func TestDrotm(t *testing.T) { + testblas.DrotmTest(t, impl) +} + +func TestDscal(t *testing.T) { + testblas.DscalTest(t, impl) +} diff --git a/blas/cgo/level2double_test.go b/blas/cgo/level2double_test.go new file mode 100644 index 00000000..41d29fb9 --- /dev/null +++ b/blas/cgo/level2double_test.go @@ -0,0 +1,75 @@ +package cgo + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func TestDgemv(t *testing.T) { + testblas.DgemvTest(t, impl) +} + +func TestDger(t *testing.T) { + testblas.DgerTest(t, impl) +} + +func TestDtbmv(t *testing.T) { + testblas.DtbmvTest(t, impl) +} + +func TestDtxmv(t *testing.T) { + testblas.DtxmvTest(t, impl) +} + +func TestDgbmv(t *testing.T) { + testblas.DgbmvTest(t, impl) +} + +func TestDtbsv(t *testing.T) { + testblas.DtbsvTest(t, impl) +} + +func TestDsbmv(t *testing.T) { + testblas.DsbmvTest(t, impl) +} + +func TestDtrsv(t *testing.T) { + testblas.DtrsvTest(t, impl) +} + +func TestDsyr(t *testing.T) { + testblas.DsyrTest(t, impl) +} + +func TestDsymv(t *testing.T) { + testblas.DsymvTest(t, impl) +} + +func TestDtrmv(t *testing.T) { + testblas.DtrmvTest(t, impl) +} + +func TestDsyr2(t *testing.T) { + testblas.Dsyr2Test(t, impl) +} + +func TestDspr2(t *testing.T) { + testblas.Dspr2Test(t, impl) +} + +func TestDspr(t *testing.T) { + testblas.DsprTest(t, impl) +} + +func TestDspmv(t *testing.T) { + testblas.DspmvTest(t, impl) +} + +func TestDtpsv(t *testing.T) { + testblas.DtpsvTest(t, impl) +} + +func TestDtmpv(t *testing.T) { + testblas.DtpmvTest(t, impl) +} diff --git a/blas/cgo/level3double_test.go b/blas/cgo/level3double_test.go new file mode 100644 index 00000000..a35af511 --- /dev/null +++ b/blas/cgo/level3double_test.go @@ -0,0 +1,31 @@ +package cgo + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func TestDgemm(t *testing.T) { + testblas.TestDgemm(t, impl) +} + +func TestDsymm(t *testing.T) { + testblas.DsymmTest(t, impl) +} + +func TestDtrsm(t *testing.T) { + testblas.DtrsmTest(t, impl) +} + +func TestDsyrk(t *testing.T) { + testblas.DsyrkTest(t, impl) +} + +func TestDsyr2k(t *testing.T) { + testblas.Dsyr2kTest(t, impl) +} + +func TestDtrmm(t *testing.T) { + testblas.DtrmmTest(t, impl) +} diff --git a/blas/native/bench_test.go b/blas/native/bench_test.go new file mode 100644 index 00000000..6a3af84b --- /dev/null +++ b/blas/native/bench_test.go @@ -0,0 +1,22 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/blas" + "github.com/gonum/blas/testblas" +) + +const ( + Sm = testblas.SmallMat + Med = testblas.MediumMat + Lg = testblas.LargeMat + Hg = testblas.HugeMat +) + +const ( + T = blas.Trans + NT = blas.NoTrans +) diff --git a/blas/native/dgemm.go b/blas/native/dgemm.go new file mode 100644 index 00000000..69dfbfb4 --- /dev/null +++ b/blas/native/dgemm.go @@ -0,0 +1,276 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "runtime" + "sync" + + "github.com/gonum/blas" + "github.com/gonum/internal/asm/f64" +) + +// Dgemm computes +// C = beta * C + alpha * A * B, +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed. +func (Implementation) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if tB != blas.NoTrans && tB != blas.Trans && tB != blas.ConjTrans { + panic(badTranspose) + } + aTrans := tA == blas.Trans || tA == blas.ConjTrans + if aTrans { + checkMatrix64(k, m, a, lda) + } else { + checkMatrix64(m, k, a, lda) + } + bTrans := tB == blas.Trans || tB == blas.ConjTrans + if bTrans { + checkMatrix64(n, k, b, ldb) + } else { + checkMatrix64(k, n, b, ldb) + } + checkMatrix64(m, n, c, ldc) + + // scale c + if beta != 1 { + if beta == 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + } + } + + dgemmParallel(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha) +} + +func dgemmParallel(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // dgemmParallel computes a parallel matrix multiplication by partitioning + // a and b into sub-blocks, and updating c with the multiplication of the sub-block + // In all cases, + // A = [ A_11 A_12 ... A_1j + // A_21 A_22 ... A_2j + // ... + // A_i1 A_i2 ... A_ij] + // + // and same for B. All of the submatrix sizes are blockSize×blockSize except + // at the edges. + // + // In all cases, there is one dimension for each matrix along which + // C must be updated sequentially. + // Cij = \sum_k Aik Bki, (A * B) + // Cij = \sum_k Aki Bkj, (A^T * B) + // Cij = \sum_k Aik Bjk, (A * B^T) + // Cij = \sum_k Aki Bjk, (A^T * B^T) + // + // This code computes one {i, j} block sequentially along the k dimension, + // and computes all of the {i, j} blocks concurrently. This + // partitioning allows Cij to be updated in-place without race-conditions. + // Instead of launching a goroutine for each possible concurrent computation, + // a number of worker goroutines are created and channels are used to pass + // available and completed cases. + // + // http://alexkr.com/docs/matrixmult.pdf is a good reference on matrix-matrix + // multiplies, though this code does not copy matrices to attempt to eliminate + // cache misses. + + maxKLen := k + parBlocks := blocks(m, blockSize) * blocks(n, blockSize) + if parBlocks < minParBlock { + // The matrix multiplication is small in the dimensions where it can be + // computed concurrently. Just do it in serial. + dgemmSerial(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + } + + nWorkers := runtime.GOMAXPROCS(0) + if parBlocks < nWorkers { + nWorkers = parBlocks + } + // There is a tradeoff between the workers having to wait for work + // and a large buffer making operations slow. + buf := buffMul * nWorkers + if buf > parBlocks { + buf = parBlocks + } + + sendChan := make(chan subMul, buf) + + // Launch workers. A worker receives an {i, j} submatrix of c, and computes + // A_ik B_ki (or the transposed version) storing the result in c_ij. When the + // channel is finally closed, it signals to the waitgroup that it has finished + // computing. + var wg sync.WaitGroup + for i := 0; i < nWorkers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + // Make local copies of otherwise global variables to reduce shared memory. + // This has a noticable effect on benchmarks in some cases. + alpha := alpha + aTrans := aTrans + bTrans := bTrans + m := m + n := n + for sub := range sendChan { + i := sub.i + j := sub.j + leni := blockSize + if i+leni > m { + leni = m - i + } + lenj := blockSize + if j+lenj > n { + lenj = n - j + } + + cSub := sliceView64(c, ldc, i, j, leni, lenj) + + // Compute A_ik B_kj for all k + for k := 0; k < maxKLen; k += blockSize { + lenk := blockSize + if k+lenk > maxKLen { + lenk = maxKLen - k + } + var aSub, bSub []float64 + if aTrans { + aSub = sliceView64(a, lda, k, i, lenk, leni) + } else { + aSub = sliceView64(a, lda, i, k, leni, lenk) + } + if bTrans { + bSub = sliceView64(b, ldb, j, k, lenj, lenk) + } else { + bSub = sliceView64(b, ldb, k, j, lenk, lenj) + } + dgemmSerial(aTrans, bTrans, leni, lenj, lenk, aSub, lda, bSub, ldb, cSub, ldc, alpha) + } + } + }() + } + + // Send out all of the {i, j} subblocks for computation. + for i := 0; i < m; i += blockSize { + for j := 0; j < n; j += blockSize { + sendChan <- subMul{ + i: i, + j: j, + } + } + } + close(sendChan) + wg.Wait() +} + +// dgemmSerial is serial matrix multiply +func dgemmSerial(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + switch { + case !aTrans && !bTrans: + dgemmSerialNotNot(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case aTrans && !bTrans: + dgemmSerialTransNot(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case !aTrans && bTrans: + dgemmSerialNotTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case aTrans && bTrans: + dgemmSerialTransTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + default: + panic("unreachable") + } +} + +// dgemmSerial where neither a nor b are transposed +func dgemmSerialNotNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for l, v := range a[i*lda : i*lda+k] { + tmp := alpha * v + if tmp != 0 { + f64.AxpyUnitaryTo(ctmp, tmp, b[l*ldb:l*ldb+n], ctmp) + } + } + } +} + +// dgemmSerial where neither a is transposed and b is not +func dgemmSerialTransNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for l := 0; l < k; l++ { + btmp := b[l*ldb : l*ldb+n] + for i, v := range a[l*lda : l*lda+m] { + tmp := alpha * v + if tmp != 0 { + ctmp := c[i*ldc : i*ldc+n] + f64.AxpyUnitaryTo(ctmp, tmp, btmp, ctmp) + } + } + } +} + +// dgemmSerial where neither a is not transposed and b is +func dgemmSerialNotTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for i := 0; i < m; i++ { + atmp := a[i*lda : i*lda+k] + ctmp := c[i*ldc : i*ldc+n] + for j := 0; j < n; j++ { + ctmp[j] += alpha * f64.DotUnitary(atmp, b[j*ldb:j*ldb+k]) + } + } +} + +// dgemmSerial where both are transposed +func dgemmSerialTransTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for l := 0; l < k; l++ { + for i, v := range a[l*lda : l*lda+m] { + tmp := alpha * v + if tmp != 0 { + ctmp := c[i*ldc : i*ldc+n] + f64.AxpyInc(tmp, b[l:], ctmp, uintptr(n), uintptr(ldb), 1, 0, 0) + } + } + } +} + +func sliceView64(a []float64, lda, i, j, r, c int) []float64 { + return a[i*lda+j : (i+r-1)*lda+j+c] +} + +func checkMatrix64(m, n int, a []float64, lda int) { + if m < 0 { + panic("blas: rows < 0") + } + if n < 0 { + panic("blas: cols < 0") + } + if lda < n { + panic("blas: illegal stride") + } + if len(a) < (m-1)*lda+n { + panic("blas: insufficient matrix slice length") + } +} diff --git a/blas/native/dgemmbench_test.go b/blas/native/dgemmbench_test.go new file mode 100644 index 00000000..3860fbf8 --- /dev/null +++ b/blas/native/dgemmbench_test.go @@ -0,0 +1,51 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func BenchmarkDgemmSmSmSm(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Sm, Sm, Sm, NT, NT) +} + +func BenchmarkDgemmMedMedMed(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Med, Med, NT, NT) +} + +func BenchmarkDgemmMedLgMed(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Lg, Med, NT, NT) +} + +func BenchmarkDgemmLgLgLg(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Lg, Lg, Lg, NT, NT) +} + +func BenchmarkDgemmLgSmLg(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Lg, Sm, Lg, NT, NT) +} + +func BenchmarkDgemmLgLgSm(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Lg, Lg, Sm, NT, NT) +} + +func BenchmarkDgemmHgHgSm(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Hg, Hg, Sm, NT, NT) +} + +func BenchmarkDgemmMedMedMedTNT(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Med, Med, T, NT) +} + +func BenchmarkDgemmMedMedMedNTT(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Med, Med, NT, T) +} + +func BenchmarkDgemmMedMedMedTT(b *testing.B) { + testblas.DgemmBenchmark(b, impl, Med, Med, Med, T, T) +} diff --git a/blas/native/dgemvbench_test.go b/blas/native/dgemvbench_test.go new file mode 100644 index 00000000..7d8fabb1 --- /dev/null +++ b/blas/native/dgemvbench_test.go @@ -0,0 +1,91 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func BenchmarkDgemvSmSmNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Sm, Sm, 1, 1) +} + +func BenchmarkDgemvSmSmNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Sm, Sm, 2, 3) +} + +func BenchmarkDgemvSmSmTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Sm, Sm, 1, 1) +} + +func BenchmarkDgemvSmSmTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Sm, Sm, 2, 3) +} + +func BenchmarkDgemvMedMedNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Med, Med, 1, 1) +} + +func BenchmarkDgemvMedMedNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Med, Med, 2, 3) +} + +func BenchmarkDgemvMedMedTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Med, Med, 1, 1) +} + +func BenchmarkDgemvMedMedTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Med, Med, 2, 3) +} + +func BenchmarkDgemvLgLgNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Lg, Lg, 1, 1) +} + +func BenchmarkDgemvLgLgNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Lg, Lg, 2, 3) +} + +func BenchmarkDgemvLgLgTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Lg, Lg, 1, 1) +} + +func BenchmarkDgemvLgLgTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Lg, Lg, 2, 3) +} + +func BenchmarkDgemvLgSmNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Lg, Sm, 1, 1) +} + +func BenchmarkDgemvLgSmNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Lg, Sm, 2, 3) +} + +func BenchmarkDgemvLgSmTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Lg, Sm, 1, 1) +} + +func BenchmarkDgemvLgSmTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Lg, Sm, 2, 3) +} + +func BenchmarkDgemvSmLgNoTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Sm, Lg, 1, 1) +} + +func BenchmarkDgemvSmLgNoTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, NT, Sm, Lg, 2, 3) +} + +func BenchmarkDgemvSmLgTransInc1(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Sm, Lg, 1, 1) +} + +func BenchmarkDgemvSmLgTransIncN(b *testing.B) { + testblas.DgemvBenchmark(b, impl, T, Sm, Lg, 2, 3) +} diff --git a/blas/native/dgerbench_test.go b/blas/native/dgerbench_test.go new file mode 100644 index 00000000..a28c7fdc --- /dev/null +++ b/blas/native/dgerbench_test.go @@ -0,0 +1,51 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func BenchmarkDgerSmSmInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Sm, Sm, 1, 1) +} + +func BenchmarkDgerSmSmIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Sm, Sm, 2, 3) +} + +func BenchmarkDgerMedMedInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Med, Med, 1, 1) +} + +func BenchmarkDgerMedMedIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Med, Med, 2, 3) +} + +func BenchmarkDgerLgLgInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Lg, Lg, 1, 1) +} + +func BenchmarkDgerLgLgIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Lg, Lg, 2, 3) +} + +func BenchmarkDgerLgSmInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Lg, Sm, 1, 1) +} + +func BenchmarkDgerLgSmIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Lg, Sm, 2, 3) +} + +func BenchmarkDgerSmLgInc1(b *testing.B) { + testblas.DgerBenchmark(b, impl, Sm, Lg, 1, 1) +} + +func BenchmarkDgerSmLgIncN(b *testing.B) { + testblas.DgerBenchmark(b, impl, Sm, Lg, 2, 3) +} diff --git a/blas/native/doc.go b/blas/native/doc.go new file mode 100644 index 00000000..cb63fe77 --- /dev/null +++ b/blas/native/doc.go @@ -0,0 +1,88 @@ +// Copyright ©2015 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Ensure changes made to blas/native are reflected in blas/cgo where relevant. + +/* +Package native is a Go implementation of the BLAS API. This implementation +panics when the input arguments are invalid as per the standard, for example +if a vector increment is zero. Please note that the treatment of NaN values +is not specified, and differs among the BLAS implementations. +github.com/gonum/blas/blas64 provides helpful wrapper functions to the BLAS +interface. The rest of this text describes the layout of the data for the input types. + +Please note that in the function documentation, x[i] refers to the i^th element +of the vector, which will be different from the i^th element of the slice if +incX != 1. + +See http://www.netlib.org/lapack/explore-html/d4/de1/_l_i_c_e_n_s_e_source.html +for more license information. + +Vector arguments are effectively strided slices. They have two input arguments, +a number of elements, n, and an increment, incX. The increment specifies the +distance between elements of the vector. The actual Go slice may be longer +than necessary. +The increment may be positive or negative, except in functions with only +a single vector argument where the increment may only be positive. If the increment +is negative, s[0] is the last element in the slice. Note that this is not the same +as counting backward from the end of the slice, as len(s) may be longer than +necessary. So, for example, if n = 5 and incX = 3, the elements of s are + [0 * * 1 * * 2 * * 3 * * 4 * * * ...] +where ∗ elements are never accessed. If incX = -3, the same elements are +accessed, just in reverse order (4, 3, 2, 1, 0). + +Dense matrices are specified by a number of rows, a number of columns, and a stride. +The stride specifies the number of entries in the slice between the first element +of successive rows. The stride must be at least as large as the number of columns +but may be longer. + [a00 ... a0n a0* ... a1stride-1 a21 ... amn am* ... amstride-1] +Thus, dense[i*ld + j] refers to the {i, j}th element of the matrix. + +Symmetric and triangular matrices (non-packed) are stored identically to Dense, +except that only elements in one triangle of the matrix are accessed. + +Packed symmetric and packed triangular matrices are laid out with the entries +condensed such that all of the unreferenced elements are removed. So, the upper triangular +matrix + [ + 1 2 3 + 0 4 5 + 0 0 6 + ] +and the lower-triangular matrix + [ + 1 0 0 + 2 3 0 + 4 5 6 + ] +will both be compacted as [1 2 3 4 5 6]. The (i, j) element of the original +dense matrix can be found at element i*n - (i-1)*i/2 + j for upper triangular, +and at element i * (i+1) /2 + j for lower triangular. + +Banded matrices are laid out in a compact format, constructed by removing the +zeros in the rows and aligning the diagonals. For example, the matrix + [ + 1 2 3 0 0 0 + 4 5 6 7 0 0 + 0 8 9 10 11 0 + 0 0 12 13 14 15 + 0 0 0 16 17 18 + 0 0 0 0 19 20 + ] + +implicitly becomes (∗ entries are never accessed) + [ + * 1 2 3 + 4 5 6 7 + 8 9 10 11 + 12 13 14 15 + 16 17 18 * + 19 20 * * + ] +which is given to the BLAS routine as [∗ 1 2 3 4 ...]. + +See http://www.crest.iu.edu/research/mtl/reference/html/banded.html +for more information +*/ +package native diff --git a/blas/native/dtrmvbench_test.go b/blas/native/dtrmvbench_test.go new file mode 100644 index 00000000..cfb81777 --- /dev/null +++ b/blas/native/dtrmvbench_test.go @@ -0,0 +1,54 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.7 + +package native + +import ( + "strconv" + "testing" + + "github.com/gonum/blas" + "github.com/gonum/blas/testblas" +) + +func BenchmarkDtrmv(b *testing.B) { + for _, n := range []int{testblas.MediumMat, testblas.LargeMat} { + for _, incX := range []int{1, 5} { + for _, uplo := range []blas.Uplo{blas.Upper, blas.Lower} { + for _, trans := range []blas.Transpose{blas.NoTrans, blas.Trans} { + for _, unit := range []blas.Diag{blas.NonUnit, blas.Unit} { + var str string + if n == testblas.MediumMat { + str += "Med" + } else if n == testblas.LargeMat { + str += "Large" + } + str += "_Inc" + strconv.Itoa(incX) + if uplo == blas.Upper { + str += "_UP" + } else { + str += "_LO" + } + if trans == blas.NoTrans { + str += "_NT" + } else { + str += "_TR" + } + if unit == blas.NonUnit { + str += "_NU" + } else { + str += "_UN" + } + lda := n + b.Run(str, func(b *testing.B) { + testblas.DtrmvBenchmark(b, Implementation{}, n, lda, incX, uplo, trans, unit) + }) + } + } + } + } + } +} diff --git a/blas/native/general_double.go b/blas/native/general_double.go new file mode 100644 index 00000000..0fa6cb74 --- /dev/null +++ b/blas/native/general_double.go @@ -0,0 +1,155 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "errors" + "fmt" + "math" +) + +func newGeneral64(r, c int) general64 { + return general64{ + data: make([]float64, r*c), + rows: r, + cols: c, + stride: c, + } +} + +type general64 struct { + data []float64 + rows, cols int + stride int +} + +// adds element-wise into receiver. rows and columns must match +func (g general64) add(h general64) { + if debug { + if g.rows != h.rows { + panic("blas: row size mismatch") + } + if g.cols != h.cols { + panic("blas: col size mismatch") + } + } + for i := 0; i < g.rows; i++ { + gtmp := g.data[i*g.stride : i*g.stride+g.cols] + for j, v := range h.data[i*h.stride : i*h.stride+h.cols] { + gtmp[j] += v + } + } +} + +// at returns the value at the ith row and jth column. For speed reasons, the +// rows and columns are not bounds checked. +func (g general64) at(i, j int) float64 { + if debug { + if i < 0 || i >= g.rows { + panic("blas: row out of bounds") + } + if j < 0 || j >= g.cols { + panic("blas: col out of bounds") + } + } + return g.data[i*g.stride+j] +} + +func (g general64) check(c byte) error { + if g.rows < 0 { + return errors.New("blas: rows < 0") + } + if g.cols < 0 { + return errors.New("blas: cols < 0") + } + if g.stride < 1 { + return errors.New("blas: stride < 1") + } + if g.stride < g.cols { + return errors.New("blas: illegal stride") + } + if (g.rows-1)*g.stride+g.cols > len(g.data) { + return fmt.Errorf("blas: index of %c out of range", c) + } + return nil +} + +func (g general64) clone() general64 { + data := make([]float64, len(g.data)) + copy(data, g.data) + return general64{ + data: data, + rows: g.rows, + cols: g.cols, + stride: g.stride, + } +} + +// assumes they are the same size +func (g general64) copy(h general64) { + if debug { + if g.rows != h.rows { + panic("blas: row mismatch") + } + if g.cols != h.cols { + panic("blas: col mismatch") + } + } + for k := 0; k < g.rows; k++ { + copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride]) + } +} + +func (g general64) equal(a general64) bool { + if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride { + return false + } + for i, v := range g.data { + if a.data[i] != v { + return false + } + } + return true +} + +/* +// print is to aid debugging. Commented out to avoid fmt import +func (g general64) print() { + fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride) + for i := 0; i < g.rows; i++ { + fmt.Println(g.data[i*g.stride : (i+1)*g.stride]) + } + +} +*/ + +func (g general64) view(i, j, r, c int) general64 { + if debug { + if i < 0 || i+r > g.rows { + panic("blas: row out of bounds") + } + if j < 0 || j+c > g.cols { + panic("blas: col out of bounds") + } + } + return general64{ + data: g.data[i*g.stride+j : (i+r-1)*g.stride+j+c], + rows: r, + cols: c, + stride: g.stride, + } +} + +func (g general64) equalWithinAbs(a general64, tol float64) bool { + if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride { + return false + } + for i, v := range g.data { + if math.Abs(a.data[i]-v) > tol { + return false + } + } + return true +} diff --git a/blas/native/general_single.go b/blas/native/general_single.go new file mode 100644 index 00000000..de1c263f --- /dev/null +++ b/blas/native/general_single.go @@ -0,0 +1,157 @@ +// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT. + +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "errors" + "fmt" + math "github.com/gonum/blas/native/internal/math32" +) + +func newGeneral32(r, c int) general32 { + return general32{ + data: make([]float32, r*c), + rows: r, + cols: c, + stride: c, + } +} + +type general32 struct { + data []float32 + rows, cols int + stride int +} + +// adds element-wise into receiver. rows and columns must match +func (g general32) add(h general32) { + if debug { + if g.rows != h.rows { + panic("blas: row size mismatch") + } + if g.cols != h.cols { + panic("blas: col size mismatch") + } + } + for i := 0; i < g.rows; i++ { + gtmp := g.data[i*g.stride : i*g.stride+g.cols] + for j, v := range h.data[i*h.stride : i*h.stride+h.cols] { + gtmp[j] += v + } + } +} + +// at returns the value at the ith row and jth column. For speed reasons, the +// rows and columns are not bounds checked. +func (g general32) at(i, j int) float32 { + if debug { + if i < 0 || i >= g.rows { + panic("blas: row out of bounds") + } + if j < 0 || j >= g.cols { + panic("blas: col out of bounds") + } + } + return g.data[i*g.stride+j] +} + +func (g general32) check(c byte) error { + if g.rows < 0 { + return errors.New("blas: rows < 0") + } + if g.cols < 0 { + return errors.New("blas: cols < 0") + } + if g.stride < 1 { + return errors.New("blas: stride < 1") + } + if g.stride < g.cols { + return errors.New("blas: illegal stride") + } + if (g.rows-1)*g.stride+g.cols > len(g.data) { + return fmt.Errorf("blas: index of %c out of range", c) + } + return nil +} + +func (g general32) clone() general32 { + data := make([]float32, len(g.data)) + copy(data, g.data) + return general32{ + data: data, + rows: g.rows, + cols: g.cols, + stride: g.stride, + } +} + +// assumes they are the same size +func (g general32) copy(h general32) { + if debug { + if g.rows != h.rows { + panic("blas: row mismatch") + } + if g.cols != h.cols { + panic("blas: col mismatch") + } + } + for k := 0; k < g.rows; k++ { + copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride]) + } +} + +func (g general32) equal(a general32) bool { + if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride { + return false + } + for i, v := range g.data { + if a.data[i] != v { + return false + } + } + return true +} + +/* +// print is to aid debugging. Commented out to avoid fmt import +func (g general32) print() { + fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride) + for i := 0; i < g.rows; i++ { + fmt.Println(g.data[i*g.stride : (i+1)*g.stride]) + } + +} +*/ + +func (g general32) view(i, j, r, c int) general32 { + if debug { + if i < 0 || i+r > g.rows { + panic("blas: row out of bounds") + } + if j < 0 || j+c > g.cols { + panic("blas: col out of bounds") + } + } + return general32{ + data: g.data[i*g.stride+j : (i+r-1)*g.stride+j+c], + rows: r, + cols: c, + stride: g.stride, + } +} + +func (g general32) equalWithinAbs(a general32, tol float32) bool { + if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride { + return false + } + for i, v := range g.data { + if math.Abs(a.data[i]-v) > tol { + return false + } + } + return true +} diff --git a/blas/native/internal/math32/math.go b/blas/native/internal/math32/math.go new file mode 100644 index 00000000..b33401b9 --- /dev/null +++ b/blas/native/internal/math32/math.go @@ -0,0 +1,113 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package math32 provides float32 versions of standard library math package +// routines used by gonum/blas/native. +package math32 + +import ( + "math" +) + +const ( + unan = 0x7fc00000 + uinf = 0x7f800000 + uneginf = 0xff800000 + mask = 0x7f8 >> 3 + shift = 32 - 8 - 1 + bias = 127 +) + +// Abs returns the absolute value of x. +// +// Special cases are: +// Abs(±Inf) = +Inf +// Abs(NaN) = NaN +func Abs(x float32) float32 { + switch { + case x < 0: + return -x + case x == 0: + return 0 // return correctly abs(-0) + } + return x +} + +// Copysign returns a value with the magnitude +// of x and the sign of y. +func Copysign(x, y float32) float32 { + const sign = 1 << 31 + return math.Float32frombits(math.Float32bits(x)&^sign | math.Float32bits(y)&sign) +} + +// Hypot returns Sqrt(p*p + q*q), taking care to avoid +// unnecessary overflow and underflow. +// +// Special cases are: +// Hypot(±Inf, q) = +Inf +// Hypot(p, ±Inf) = +Inf +// Hypot(NaN, q) = NaN +// Hypot(p, NaN) = NaN +func Hypot(p, q float32) float32 { + // special cases + switch { + case IsInf(p, 0) || IsInf(q, 0): + return Inf(1) + case IsNaN(p) || IsNaN(q): + return NaN() + } + if p < 0 { + p = -p + } + if q < 0 { + q = -q + } + if p < q { + p, q = q, p + } + if p == 0 { + return 0 + } + q = q / p + return p * Sqrt(1+q*q) +} + +// Inf returns positive infinity if sign >= 0, negative infinity if sign < 0. +func Inf(sign int) float32 { + var v uint32 + if sign >= 0 { + v = uinf + } else { + v = uneginf + } + return math.Float32frombits(v) +} + +// IsInf reports whether f is an infinity, according to sign. +// If sign > 0, IsInf reports whether f is positive infinity. +// If sign < 0, IsInf reports whether f is negative infinity. +// If sign == 0, IsInf reports whether f is either infinity. +func IsInf(f float32, sign int) bool { + // Test for infinity by comparing against maximum float. + // To avoid the floating-point hardware, could use: + // x := math.Float32bits(f); + // return sign >= 0 && x == uinf || sign <= 0 && x == uneginf; + return sign >= 0 && f > math.MaxFloat32 || sign <= 0 && f < -math.MaxFloat32 +} + +// IsNaN reports whether f is an IEEE 754 ``not-a-number'' value. +func IsNaN(f float32) (is bool) { + // IEEE 754 says that only NaNs satisfy f != f. + // To avoid the floating-point hardware, could use: + // x := math.Float32bits(f); + // return uint32(x>>shift)&mask == mask && x != uinf && x != uneginf + return f != f +} + +// NaN returns an IEEE 754 ``not-a-number'' value. +func NaN() float32 { return math.Float32frombits(unan) } diff --git a/blas/native/internal/math32/math_test.go b/blas/native/internal/math32/math_test.go new file mode 100644 index 00000000..f0689274 --- /dev/null +++ b/blas/native/internal/math32/math_test.go @@ -0,0 +1,226 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math32 + +import ( + "math" + "testing" + "testing/quick" + + "github.com/gonum/floats" +) + +const tol = 1e-7 + +func TestAbs(t *testing.T) { + f := func(x float32) bool { + y := Abs(x) + return y == float32(math.Abs(float64(x))) + } + if err := quick.Check(f, nil); err != nil { + t.Error(err) + } +} + +func TestCopySign(t *testing.T) { + f := func(x struct{ X, Y float32 }) bool { + y := Copysign(x.X, x.Y) + return y == float32(math.Copysign(float64(x.X), float64(x.Y))) + } + if err := quick.Check(f, nil); err != nil { + t.Error(err) + } +} + +func TestHypot(t *testing.T) { + f := func(x struct{ X, Y float32 }) bool { + y := Hypot(x.X, x.Y) + if math.Hypot(float64(x.X), float64(x.Y)) > math.MaxFloat32 { + return true + } + return floats.EqualWithinRel(float64(y), math.Hypot(float64(x.X), float64(x.Y)), tol) + } + if err := quick.Check(f, nil); err != nil { + t.Error(err) + } +} + +func TestInf(t *testing.T) { + if float64(Inf(1)) != math.Inf(1) || float64(Inf(-1)) != math.Inf(-1) { + t.Error("float32(inf) not infinite") + } +} + +func TestIsInf(t *testing.T) { + posInf := float32(math.Inf(1)) + negInf := float32(math.Inf(-1)) + if !IsInf(posInf, 0) || !IsInf(negInf, 0) || !IsInf(posInf, 1) || !IsInf(negInf, -1) || IsInf(posInf, -1) || IsInf(negInf, 1) { + t.Error("unexpected isInf value") + } + f := func(x struct { + F float32 + Sign int + }) bool { + y := IsInf(x.F, x.Sign) + return y == math.IsInf(float64(x.F), x.Sign) + } + if err := quick.Check(f, nil); err != nil { + t.Error(err) + } +} + +func TestIsNaN(t *testing.T) { + f := func(x float32) bool { + y := IsNaN(x) + return y == math.IsNaN(float64(x)) + } + if err := quick.Check(f, nil); err != nil { + t.Error(err) + } +} + +func TestNaN(t *testing.T) { + if !math.IsNaN(float64(NaN())) { + t.Errorf("float32(nan) is a number: %f", NaN()) + } +} + +func TestSqrt(t *testing.T) { + f := func(x float32) bool { + y := Sqrt(x) + if IsNaN(y) && IsNaN(sqrt(x)) { + return true + } + return floats.EqualWithinRel(float64(y), float64(sqrt(x)), tol) + } + if err := quick.Check(f, nil); err != nil { + t.Error(err) + } +} + +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_sqrt(x) +// Return correctly rounded sqrt. +// ----------------------------------------- +// | Use the hardware sqrt if you have one | +// ----------------------------------------- +// Method: +// Bit by bit method using integer arithmetic. (Slow, but portable) +// 1. Normalization +// Scale x to y in [1,4) with even powers of 2: +// find an integer k such that 1 <= (y=x*2**(2k)) < 4, then +// sqrt(x) = 2**k * sqrt(y) +// 2. Bit by bit computation +// Let q = sqrt(y) truncated to i bit after binary point (q = 1), +// i 0 +// i+1 2 +// s = 2*q , and y = 2 * ( y - q ). (1) +// i i i i +// +// To compute q from q , one checks whether +// i+1 i +// +// -(i+1) 2 +// (q + 2 ) <= y. (2) +// i +// -(i+1) +// If (2) is false, then q = q ; otherwise q = q + 2 . +// i+1 i i+1 i +// +// With some algebraic manipulation, it is not difficult to see +// that (2) is equivalent to +// -(i+1) +// s + 2 <= y (3) +// i i +// +// The advantage of (3) is that s and y can be computed by +// i i +// the following recurrence formula: +// if (3) is false +// +// s = s , y = y ; (4) +// i+1 i i+1 i +// +// otherwise, +// -i -(i+1) +// s = s + 2 , y = y - s - 2 (5) +// i+1 i i+1 i i +// +// One may easily use induction to prove (4) and (5). +// Note. Since the left hand side of (3) contain only i+2 bits, +// it does not necessary to do a full (53-bit) comparison +// in (3). +// 3. Final rounding +// After generating the 53 bits result, we compute one more bit. +// Together with the remainder, we can decide whether the +// result is exact, bigger than 1/2ulp, or less than 1/2ulp +// (it will never equal to 1/2ulp). +// The rounding mode can be detected by checking whether +// huge + tiny is equal to huge, and whether huge - tiny is +// equal to huge for some floating point number "huge" and "tiny". +// +func sqrt(x float32) float32 { + // special cases + switch { + case x == 0 || IsNaN(x) || IsInf(x, 1): + return x + case x < 0: + return NaN() + } + ix := math.Float32bits(x) + // normalize x + exp := int((ix >> shift) & mask) + if exp == 0 { // subnormal x + for ix&1<>= 1 // exp = exp/2, exponent of square root + // generate sqrt(x) bit by bit + ix <<= 1 + var q, s uint32 // q = sqrt(x) + r := uint32(1 << (shift + 1)) // r = moving bit from MSB to LSB + for r != 0 { + t := s + r + if t <= ix { + s = t + r + ix -= t + q += r + } + ix <<= 1 + r >>= 1 + } + // final rounding + if ix != 0 { // remainder, result not exact + q += q & 1 // round according to extra bit + } + ix = q>>1 + uint32(exp-1+bias)< 0 && (n-1)*incX >= len(x) { + panic(badX) + } + if n < 2 { + if n == 1 { + return math.Abs(x[0]) + } + if n == 0 { + return 0 + } + if n < 1 { + panic(negativeN) + } + } + var ( + scale float64 = 0 + sumSquares float64 = 1 + ) + if incX == 1 { + x = x[:n] + for _, v := range x { + if v == 0 { + continue + } + absxi := math.Abs(v) + if math.IsNaN(absxi) { + return math.NaN() + } + if scale < absxi { + sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi) + scale = absxi + } else { + sumSquares = sumSquares + (absxi/scale)*(absxi/scale) + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(sumSquares) + } + for ix := 0; ix < n*incX; ix += incX { + val := x[ix] + if val == 0 { + continue + } + absxi := math.Abs(val) + if math.IsNaN(absxi) { + return math.NaN() + } + if scale < absxi { + sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi) + scale = absxi + } else { + sumSquares = sumSquares + (absxi/scale)*(absxi/scale) + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(sumSquares) +} + +// Dasum computes the sum of the absolute values of the elements of x. +// \sum_i |x[i]| +// Dasum returns 0 if incX is negative. +func (Implementation) Dasum(n int, x []float64, incX int) float64 { + var sum float64 + if n < 0 { + panic(negativeN) + } + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic(badX) + } + if incX == 1 { + x = x[:n] + for _, v := range x { + sum += math.Abs(v) + } + return sum + } + for i := 0; i < n; i++ { + sum += math.Abs(x[i*incX]) + } + return sum +} + +// Idamax returns the index of an element of x with the largest absolute value. +// If there are multiple such indices the earliest is returned. +// Idamax returns -1 if n == 0. +func (Implementation) Idamax(n int, x []float64, incX int) int { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return -1 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic(badX) + } + if n < 2 { + if n == 1 { + return 0 + } + if n == 0 { + return -1 // Netlib returns invalid index when n == 0 + } + if n < 1 { + panic(negativeN) + } + } + idx := 0 + max := math.Abs(x[0]) + if incX == 1 { + for i, v := range x[:n] { + absV := math.Abs(v) + if absV > max { + max = absV + idx = i + } + } + return idx + } + ix := incX + for i := 1; i < n; i++ { + v := x[ix] + absV := math.Abs(v) + if absV > max { + max = absV + idx = i + } + ix += incX + } + return idx +} + +// Dswap exchanges the elements of two vectors. +// x[i], y[i] = y[i], x[i] for all i +func (Implementation) Dswap(n int, x []float64, incX int, y []float64, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, v := range x { + x[i], y[i] = y[i], v + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + x[ix], y[iy] = y[iy], x[ix] + ix += incX + iy += incY + } +} + +// Dcopy copies the elements of x into the elements of y. +// y[i] = x[i] for all i +func (Implementation) Dcopy(n int, x []float64, incX int, y []float64, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if incX == 1 && incY == 1 { + copy(y[:n], x[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + y[iy] = x[ix] + ix += incX + iy += incY + } +} + +// Daxpy adds alpha times x to y +// y[i] += alpha * x[i] for all i +func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if alpha == 0 { + return + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(badLenX) + } + if len(y) < n { + panic(badLenY) + } + f64.AxpyUnitaryTo(y, alpha, x[:n], y) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(badLenX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(badLenY) + } + f64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Drotg computes the plane rotation +// _ _ _ _ _ _ +// | c s | | a | | r | +// | -s c | * | b | = | 0 | +// ‾ ‾ ‾ ‾ ‾ ‾ +// where +// r = ±√(a^2 + b^2) +// c = a/r, the cosine of the plane rotation +// s = b/r, the sine of the plane rotation +// +// NOTE: There is a discrepancy between the refence implementation and the BLAS +// technical manual regarding the sign for r when a or b are zero. +// Drotg agrees with the definition in the manual and other +// common BLAS implementations. +func (Implementation) Drotg(a, b float64) (c, s, r, z float64) { + if b == 0 && a == 0 { + return 1, 0, a, 0 + } + absA := math.Abs(a) + absB := math.Abs(b) + aGTb := absA > absB + r = math.Hypot(a, b) + if aGTb { + r = math.Copysign(r, a) + } else { + r = math.Copysign(r, b) + } + c = a / r + s = b / r + if aGTb { + z = s + } else if c != 0 { // r == 0 case handled above + z = 1 / c + } else { + z = 1 + } + return +} + +// Drotmg computes the modified Givens rotation. See +// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html +// for more details. +func (Implementation) Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) { + var p1, p2, q1, q2, u float64 + + const ( + gam = 4096.0 + gamsq = 16777216.0 + rgamsq = 5.9604645e-8 + ) + + if d1 < 0 { + p.Flag = blas.Rescaling + return + } + + p2 = d2 * y1 + if p2 == 0 { + p.Flag = blas.Identity + rd1 = d1 + rd2 = d2 + rx1 = x1 + return + } + p1 = d1 * x1 + q2 = p2 * y1 + q1 = p1 * x1 + + absQ1 := math.Abs(q1) + absQ2 := math.Abs(q2) + + if absQ1 < absQ2 && q2 < 0 { + p.Flag = blas.Rescaling + return + } + + if d1 == 0 { + p.Flag = blas.Diagonal + p.H[0] = p1 / p2 + p.H[3] = x1 / y1 + u = 1 + p.H[0]*p.H[3] + rd1, rd2 = d2/u, d1/u + rx1 = y1 / u + return + } + + // Now we know that d1 != 0, and d2 != 0. If d2 == 0, it would be caught + // when p2 == 0, and if d1 == 0, then it is caught above + + if absQ1 > absQ2 { + p.H[1] = -y1 / x1 + p.H[2] = p2 / p1 + u = 1 - p.H[2]*p.H[1] + rd1 = d1 + rd2 = d2 + rx1 = x1 + p.Flag = blas.OffDiagonal + // u must be greater than zero because |q1| > |q2|, so check from netlib + // is unnecessary + // This is left in for ease of comparison with complex routines + //if u > 0 { + rd1 /= u + rd2 /= u + rx1 *= u + //} + } else { + p.Flag = blas.Diagonal + p.H[0] = p1 / p2 + p.H[3] = x1 / y1 + u = 1 + p.H[0]*p.H[3] + rd1 = d2 / u + rd2 = d1 / u + rx1 = y1 * u + } + + for rd1 <= rgamsq || rd1 >= gamsq { + if p.Flag == blas.OffDiagonal { + p.H[0] = 1 + p.H[3] = 1 + p.Flag = blas.Rescaling + } else if p.Flag == blas.Diagonal { + p.H[1] = -1 + p.H[2] = 1 + p.Flag = blas.Rescaling + } + if rd1 <= rgamsq { + rd1 *= gam * gam + rx1 /= gam + p.H[0] /= gam + p.H[2] /= gam + } else { + rd1 /= gam * gam + rx1 *= gam + p.H[0] *= gam + p.H[2] *= gam + } + } + + for math.Abs(rd2) <= rgamsq || math.Abs(rd2) >= gamsq { + if p.Flag == blas.OffDiagonal { + p.H[0] = 1 + p.H[3] = 1 + p.Flag = blas.Rescaling + } else if p.Flag == blas.Diagonal { + p.H[1] = -1 + p.H[2] = 1 + p.Flag = blas.Rescaling + } + if math.Abs(rd2) <= rgamsq { + rd2 *= gam * gam + p.H[1] /= gam + p.H[3] /= gam + } else { + rd2 /= gam * gam + p.H[1] *= gam + p.H[3] *= gam + } + } + return +} + +// Drot applies a plane transformation. +// x[i] = c * x[i] + s * y[i] +// y[i] = c * y[i] - s * x[i] +func (Implementation) Drot(n int, x []float64, incX int, y []float64, incY int, c float64, s float64) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = c*vx+s*vy, c*vy-s*vx + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = c*vx+s*vy, c*vy-s*vx + ix += incX + iy += incY + } +} + +// Drotm applies the modified Givens rotation to the 2×n matrix. +func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + + var h11, h12, h21, h22 float64 + var ix, iy int + switch p.Flag { + case blas.Identity: + return + case blas.Rescaling: + h11 = p.H[0] + h12 = p.H[2] + h21 = p.H[1] + h22 = p.H[3] + case blas.OffDiagonal: + h11 = 1 + h12 = p.H[2] + h21 = p.H[1] + h22 = 1 + case blas.Diagonal: + h11 = p.H[0] + h12 = 1 + h21 = -1 + h22 = p.H[3] + } + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = vx*h11+vy*h12, vx*h21+vy*h22 + } + return + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = vx*h11+vy*h12, vx*h21+vy*h22 + ix += incX + iy += incY + } + return +} + +// Dscal scales x by alpha. +// x[i] *= alpha +// Dscal has no effect if incX < 0. +func (Implementation) Dscal(n int, alpha float64, x []float64, incX int) { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return + } + if (n-1)*incX >= len(x) { + panic(badX) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if alpha == 0 { + if incX == 1 { + x = x[:n] + for i := range x { + x[i] = 0 + } + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] = 0 + } + return + } + if incX == 1 { + f64.ScalUnitary(alpha, x[:n]) + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] *= alpha + } +} diff --git a/blas/native/level1doubleBench_auto_test.go b/blas/native/level1doubleBench_auto_test.go new file mode 100644 index 00000000..cb46c75b --- /dev/null +++ b/blas/native/level1doubleBench_auto_test.go @@ -0,0 +1,1685 @@ +// Code generated by "go run github.com/gonum/blas/testblas/benchautogen/autogen_bench_level1double.go"; DO NOT EDIT. + +// Copyright 2014 The Gonum Authors. All rights reserved. +// Use of this code is governed by a BSD-style +// license that can be found in the LICENSE file + +package native + +import ( + "math/rand" + "testing" + + "github.com/gonum/blas" +) + +const ( + posInc1 = 5 + posInc2 = 3 + negInc1 = -3 + negInc2 = -4 + SMALL_SLICE = 10 + MEDIUM_SLICE = 1000 + LARGE_SLICE = 100000 + HUGE_SLICE = 10000000 +) + +func randomSlice(l, idx int) []float64 { + if idx < 0 { + idx = -idx + } + s := make([]float64, l*idx) + for i := range s { + s[i] = rand.Float64() + } + return s +} + +func benchmarkDdot(b *testing.B, n int, x []float64, incX int, y []float64, incY int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Ddot(n, x, incX, y, incY) + } +} + +func BenchmarkDdotSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +func BenchmarkDdotHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDdot(b, n, x, incX, y, incY) +} + +/* ------------------ */ +func benchmarkDnrm2(b *testing.B, n int, x []float64, incX int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dnrm2(n, x, incX) + } +} + +func BenchmarkDnrm2SmallUnitaryInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2SmallPosInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2MediumUnitaryInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2MediumPosInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2LargeUnitaryInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2LargePosInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2HugeUnitaryInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +func BenchmarkDnrm2HugePosInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDnrm2(b, n, x, incX) +} + +/* ------------------ */ +func benchmarkDasum(b *testing.B, n int, x []float64, incX int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dasum(n, x, incX) + } +} + +func BenchmarkDasumSmallUnitaryInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumSmallPosInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumMediumUnitaryInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumMediumPosInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumLargeUnitaryInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumLargePosInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumHugeUnitaryInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +func BenchmarkDasumHugePosInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkDasum(b, n, x, incX) +} + +/* ------------------ */ +func benchmarkIdamax(b *testing.B, n int, x []float64, incX int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Idamax(n, x, incX) + } +} + +func BenchmarkIdamaxSmallUnitaryInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxSmallPosInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxMediumUnitaryInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxMediumPosInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxLargeUnitaryInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxLargePosInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxHugeUnitaryInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +func BenchmarkIdamaxHugePosInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + + benchmarkIdamax(b, n, x, incX) +} + +/* ------------------ */ +func benchmarkDswap(b *testing.B, n int, x []float64, incX int, y []float64, incY int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dswap(n, x, incX, y, incY) + } +} + +func BenchmarkDswapSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +func BenchmarkDswapHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDswap(b, n, x, incX, y, incY) +} + +/* ------------------ */ +func benchmarkDcopy(b *testing.B, n int, x []float64, incX int, y []float64, incY int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dcopy(n, x, incX, y, incY) + } +} + +func BenchmarkDcopySmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopySmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopySmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopySmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +func BenchmarkDcopyHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + + benchmarkDcopy(b, n, x, incX, y, incY) +} + +/* ------------------ */ +func benchmarkDaxpy(b *testing.B, n int, alpha float64, x []float64, incX int, y []float64, incY int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Daxpy(n, alpha, x, incX, y, incY) + } +} + +func BenchmarkDaxpySmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpySmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpySmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpySmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +func BenchmarkDaxpyHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + alpha := 2.4 + benchmarkDaxpy(b, n, alpha, x, incX, y, incY) +} + +/* ------------------ */ +func benchmarkDrot(b *testing.B, n int, x []float64, incX int, y []float64, incY int, c, s float64) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Drot(n, x, incX, y, incY, c, s) + } +} + +func BenchmarkDrotSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +func BenchmarkDrotHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + c := 0.89725836967 + s := 0.44150585279 + benchmarkDrot(b, n, x, incX, y, incY, c, s) +} + +/* ------------------ */ +func benchmarkDrotmOffDia(b *testing.B, n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Drotm(n, x, incX, y, incY, p) + } +} + +func BenchmarkDrotmOffDiaSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmOffDiaHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375, 0}} + benchmarkDrotmOffDia(b, n, x, incX, y, incY, p) +} + +/* ------------------ */ +func benchmarkDrotmDia(b *testing.B, n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Drotm(n, x, incX, y, incY, p) + } +} + +func BenchmarkDrotmDiaSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmDiaHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}} + benchmarkDrotmDia(b, n, x, incX, y, incY, p) +} + +/* ------------------ */ +func benchmarkDrotmResc(b *testing.B, n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Drotm(n, x, incX, y, incY, p) + } +} + +func BenchmarkDrotmRescSmallBothUnitary(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescSmallIncUni(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescSmallUniInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescSmallBothInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescMediumBothUnitary(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescMediumIncUni(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescMediumUniInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescMediumBothInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescLargeBothUnitary(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescLargeIncUni(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescLargeUniInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescLargeBothInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescHugeBothUnitary(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescHugeIncUni(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := 1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescHugeUniInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +func BenchmarkDrotmRescHugeBothInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + incY := negInc1 + y := randomSlice(n, incY) + p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}} + benchmarkDrotmResc(b, n, x, incX, y, incY, p) +} + +/* ------------------ */ +func benchmarkDscal(b *testing.B, n int, alpha float64, x []float64, incX int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + impl.Dscal(n, alpha, x, incX) + } +} + +func BenchmarkDscalSmallUnitaryInc(b *testing.B) { + n := SMALL_SLICE + incX := 1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalSmallPosInc(b *testing.B) { + n := SMALL_SLICE + incX := posInc1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalMediumUnitaryInc(b *testing.B) { + n := MEDIUM_SLICE + incX := 1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalMediumPosInc(b *testing.B) { + n := MEDIUM_SLICE + incX := posInc1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalLargeUnitaryInc(b *testing.B) { + n := LARGE_SLICE + incX := 1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalLargePosInc(b *testing.B) { + n := LARGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalHugeUnitaryInc(b *testing.B) { + n := HUGE_SLICE + incX := 1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +func BenchmarkDscalHugePosInc(b *testing.B) { + n := HUGE_SLICE + incX := posInc1 + x := randomSlice(n, incX) + alpha := 2.4 + benchmarkDscal(b, n, alpha, x, incX) +} + +/* ------------------ */ diff --git a/blas/native/level1double_ddot.go b/blas/native/level1double_ddot.go new file mode 100644 index 00000000..03e66a0a --- /dev/null +++ b/blas/native/level1double_ddot.go @@ -0,0 +1,49 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/internal/asm/f64" +) + +// Ddot computes the dot product of the two vectors +// \sum_i x[i]*y[i] +func (Implementation) Ddot(n int, x []float64, incX int, y []float64, incY int) float64 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(negativeN) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(badLenX) + } + if len(y) < n { + panic(badLenY) + } + return f64.DotUnitary(x[:n], y) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(badLenX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(badLenY) + } + return f64.DotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} diff --git a/blas/native/level1double_test.go b/blas/native/level1double_test.go new file mode 100644 index 00000000..6f222bf6 --- /dev/null +++ b/blas/native/level1double_test.go @@ -0,0 +1,61 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +var impl Implementation + +func TestDasum(t *testing.T) { + testblas.DasumTest(t, impl) +} + +func TestDaxpy(t *testing.T) { + testblas.DaxpyTest(t, impl) +} + +func TestDdot(t *testing.T) { + testblas.DdotTest(t, impl) +} + +func TestDnrm2(t *testing.T) { + testblas.Dnrm2Test(t, impl) +} + +func TestIdamax(t *testing.T) { + testblas.IdamaxTest(t, impl) +} + +func TestDswap(t *testing.T) { + testblas.DswapTest(t, impl) +} + +func TestDcopy(t *testing.T) { + testblas.DcopyTest(t, impl) +} + +func TestDrotg(t *testing.T) { + testblas.DrotgTest(t, impl) +} + +func TestDrotmg(t *testing.T) { + testblas.DrotmgTest(t, impl) +} + +func TestDrot(t *testing.T) { + testblas.DrotTest(t, impl) +} + +func TestDrotm(t *testing.T) { + testblas.DrotmTest(t, impl) +} + +func TestDscal(t *testing.T) { + testblas.DscalTest(t, impl) +} diff --git a/blas/native/level1single.go b/blas/native/level1single.go new file mode 100644 index 00000000..046a4410 --- /dev/null +++ b/blas/native/level1single.go @@ -0,0 +1,634 @@ +// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT. + +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + math "github.com/gonum/blas/native/internal/math32" + + "github.com/gonum/blas" + "github.com/gonum/internal/asm/f32" +) + +var _ blas.Float32Level1 = Implementation{} + +// Snrm2 computes the Euclidean norm of a vector, +// sqrt(\sum_i x[i] * x[i]). +// This function returns 0 if incX is negative. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Snrm2(n int, x []float32, incX int) float32 { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic(badX) + } + if n < 2 { + if n == 1 { + return math.Abs(x[0]) + } + if n == 0 { + return 0 + } + if n < 1 { + panic(negativeN) + } + } + var ( + scale float32 = 0 + sumSquares float32 = 1 + ) + if incX == 1 { + x = x[:n] + for _, v := range x { + if v == 0 { + continue + } + absxi := math.Abs(v) + if math.IsNaN(absxi) { + return math.NaN() + } + if scale < absxi { + sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi) + scale = absxi + } else { + sumSquares = sumSquares + (absxi/scale)*(absxi/scale) + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(sumSquares) + } + for ix := 0; ix < n*incX; ix += incX { + val := x[ix] + if val == 0 { + continue + } + absxi := math.Abs(val) + if math.IsNaN(absxi) { + return math.NaN() + } + if scale < absxi { + sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi) + scale = absxi + } else { + sumSquares = sumSquares + (absxi/scale)*(absxi/scale) + } + } + if math.IsInf(scale, 1) { + return math.Inf(1) + } + return scale * math.Sqrt(sumSquares) +} + +// Sasum computes the sum of the absolute values of the elements of x. +// \sum_i |x[i]| +// Sasum returns 0 if incX is negative. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sasum(n int, x []float32, incX int) float32 { + var sum float32 + if n < 0 { + panic(negativeN) + } + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return 0 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic(badX) + } + if incX == 1 { + x = x[:n] + for _, v := range x { + sum += math.Abs(v) + } + return sum + } + for i := 0; i < n; i++ { + sum += math.Abs(x[i*incX]) + } + return sum +} + +// Isamax returns the index of an element of x with the largest absolute value. +// If there are multiple such indices the earliest is returned. +// Isamax returns -1 if n == 0. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Isamax(n int, x []float32, incX int) int { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return -1 + } + if incX > 0 && (n-1)*incX >= len(x) { + panic(badX) + } + if n < 2 { + if n == 1 { + return 0 + } + if n == 0 { + return -1 // Netlib returns invalid index when n == 0 + } + if n < 1 { + panic(negativeN) + } + } + idx := 0 + max := math.Abs(x[0]) + if incX == 1 { + for i, v := range x[:n] { + absV := math.Abs(v) + if absV > max { + max = absV + idx = i + } + } + return idx + } + ix := incX + for i := 1; i < n; i++ { + v := x[ix] + absV := math.Abs(v) + if absV > max { + max = absV + idx = i + } + ix += incX + } + return idx +} + +// Sswap exchanges the elements of two vectors. +// x[i], y[i] = y[i], x[i] for all i +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sswap(n int, x []float32, incX int, y []float32, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, v := range x { + x[i], y[i] = y[i], v + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + x[ix], y[iy] = y[iy], x[ix] + ix += incX + iy += incY + } +} + +// Scopy copies the elements of x into the elements of y. +// y[i] = x[i] for all i +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Scopy(n int, x []float32, incX int, y []float32, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if incX == 1 && incY == 1 { + copy(y[:n], x[:n]) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + y[iy] = x[ix] + ix += incX + iy += incY + } +} + +// Saxpy adds alpha times x to y +// y[i] += alpha * x[i] for all i +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if alpha == 0 { + return + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(badLenX) + } + if len(y) < n { + panic(badLenY) + } + f32.AxpyUnitaryTo(y, alpha, x[:n], y) + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(badLenX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(badLenY) + } + f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} + +// Srotg computes the plane rotation +// _ _ _ _ _ _ +// | c s | | a | | r | +// | -s c | * | b | = | 0 | +// ‾ ‾ ‾ ‾ ‾ ‾ +// where +// r = ±√(a^2 + b^2) +// c = a/r, the cosine of the plane rotation +// s = b/r, the sine of the plane rotation +// +// NOTE: There is a discrepancy between the refence implementation and the BLAS +// technical manual regarding the sign for r when a or b are zero. +// Srotg agrees with the definition in the manual and other +// common BLAS implementations. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Srotg(a, b float32) (c, s, r, z float32) { + if b == 0 && a == 0 { + return 1, 0, a, 0 + } + absA := math.Abs(a) + absB := math.Abs(b) + aGTb := absA > absB + r = math.Hypot(a, b) + if aGTb { + r = math.Copysign(r, a) + } else { + r = math.Copysign(r, b) + } + c = a / r + s = b / r + if aGTb { + z = s + } else if c != 0 { // r == 0 case handled above + z = 1 / c + } else { + z = 1 + } + return +} + +// Srotmg computes the modified Givens rotation. See +// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html +// for more details. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Srotmg(d1, d2, x1, y1 float32) (p blas.SrotmParams, rd1, rd2, rx1 float32) { + var p1, p2, q1, q2, u float32 + + const ( + gam = 4096.0 + gamsq = 16777216.0 + rgamsq = 5.9604645e-8 + ) + + if d1 < 0 { + p.Flag = blas.Rescaling + return + } + + p2 = d2 * y1 + if p2 == 0 { + p.Flag = blas.Identity + rd1 = d1 + rd2 = d2 + rx1 = x1 + return + } + p1 = d1 * x1 + q2 = p2 * y1 + q1 = p1 * x1 + + absQ1 := math.Abs(q1) + absQ2 := math.Abs(q2) + + if absQ1 < absQ2 && q2 < 0 { + p.Flag = blas.Rescaling + return + } + + if d1 == 0 { + p.Flag = blas.Diagonal + p.H[0] = p1 / p2 + p.H[3] = x1 / y1 + u = 1 + p.H[0]*p.H[3] + rd1, rd2 = d2/u, d1/u + rx1 = y1 / u + return + } + + // Now we know that d1 != 0, and d2 != 0. If d2 == 0, it would be caught + // when p2 == 0, and if d1 == 0, then it is caught above + + if absQ1 > absQ2 { + p.H[1] = -y1 / x1 + p.H[2] = p2 / p1 + u = 1 - p.H[2]*p.H[1] + rd1 = d1 + rd2 = d2 + rx1 = x1 + p.Flag = blas.OffDiagonal + // u must be greater than zero because |q1| > |q2|, so check from netlib + // is unnecessary + // This is left in for ease of comparison with complex routines + //if u > 0 { + rd1 /= u + rd2 /= u + rx1 *= u + //} + } else { + p.Flag = blas.Diagonal + p.H[0] = p1 / p2 + p.H[3] = x1 / y1 + u = 1 + p.H[0]*p.H[3] + rd1 = d2 / u + rd2 = d1 / u + rx1 = y1 * u + } + + for rd1 <= rgamsq || rd1 >= gamsq { + if p.Flag == blas.OffDiagonal { + p.H[0] = 1 + p.H[3] = 1 + p.Flag = blas.Rescaling + } else if p.Flag == blas.Diagonal { + p.H[1] = -1 + p.H[2] = 1 + p.Flag = blas.Rescaling + } + if rd1 <= rgamsq { + rd1 *= gam * gam + rx1 /= gam + p.H[0] /= gam + p.H[2] /= gam + } else { + rd1 /= gam * gam + rx1 *= gam + p.H[0] *= gam + p.H[2] *= gam + } + } + + for math.Abs(rd2) <= rgamsq || math.Abs(rd2) >= gamsq { + if p.Flag == blas.OffDiagonal { + p.H[0] = 1 + p.H[3] = 1 + p.Flag = blas.Rescaling + } else if p.Flag == blas.Diagonal { + p.H[1] = -1 + p.H[2] = 1 + p.Flag = blas.Rescaling + } + if math.Abs(rd2) <= rgamsq { + rd2 *= gam * gam + p.H[1] /= gam + p.H[3] /= gam + } else { + rd2 /= gam * gam + p.H[1] *= gam + p.H[3] *= gam + } + } + return +} + +// Srot applies a plane transformation. +// x[i] = c * x[i] + s * y[i] +// y[i] = c * y[i] - s * x[i] +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Srot(n int, x []float32, incX int, y []float32, incY int, c float32, s float32) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = c*vx+s*vy, c*vy-s*vx + } + return + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = c*vx+s*vy, c*vy-s*vx + ix += incX + iy += incY + } +} + +// Srotm applies the modified Givens rotation to the 2×n matrix. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams) { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return + } + panic(negativeN) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + + var h11, h12, h21, h22 float32 + var ix, iy int + switch p.Flag { + case blas.Identity: + return + case blas.Rescaling: + h11 = p.H[0] + h12 = p.H[2] + h21 = p.H[1] + h22 = p.H[3] + case blas.OffDiagonal: + h11 = 1 + h12 = p.H[2] + h21 = p.H[1] + h22 = 1 + case blas.Diagonal: + h11 = p.H[0] + h12 = 1 + h21 = -1 + h22 = p.H[3] + } + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if incX == 1 && incY == 1 { + x = x[:n] + for i, vx := range x { + vy := y[i] + x[i], y[i] = vx*h11+vy*h12, vx*h21+vy*h22 + } + return + } + for i := 0; i < n; i++ { + vx := x[ix] + vy := y[iy] + x[ix], y[iy] = vx*h11+vy*h12, vx*h21+vy*h22 + ix += incX + iy += incY + } + return +} + +// Sscal scales x by alpha. +// x[i] *= alpha +// Sscal has no effect if incX < 0. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) { + if incX < 1 { + if incX == 0 { + panic(zeroIncX) + } + return + } + if (n-1)*incX >= len(x) { + panic(badX) + } + if n < 1 { + if n == 0 { + return + } + panic(negativeN) + } + if alpha == 0 { + if incX == 1 { + x = x[:n] + for i := range x { + x[i] = 0 + } + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] = 0 + } + return + } + if incX == 1 { + f32.ScalUnitary(alpha, x[:n]) + return + } + for ix := 0; ix < n*incX; ix += incX { + x[ix] *= alpha + } +} diff --git a/blas/native/level1single_dsdot.go b/blas/native/level1single_dsdot.go new file mode 100644 index 00000000..a60f81b8 --- /dev/null +++ b/blas/native/level1single_dsdot.go @@ -0,0 +1,53 @@ +// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT. + +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/internal/asm/f32" +) + +// Dsdot computes the dot product of the two vectors +// \sum_i x[i]*y[i] +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Dsdot(n int, x []float32, incX int, y []float32, incY int) float64 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(negativeN) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(badLenX) + } + if len(y) < n { + panic(badLenY) + } + return f32.DdotUnitary(x[:n], y) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(badLenX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(badLenY) + } + return f32.DdotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} diff --git a/blas/native/level1single_sdot.go b/blas/native/level1single_sdot.go new file mode 100644 index 00000000..ccce03d5 --- /dev/null +++ b/blas/native/level1single_sdot.go @@ -0,0 +1,53 @@ +// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT. + +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/internal/asm/f32" +) + +// Sdot computes the dot product of the two vectors +// \sum_i x[i]*y[i] +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sdot(n int, x []float32, incX int, y []float32, incY int) float32 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(negativeN) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(badLenX) + } + if len(y) < n { + panic(badLenY) + } + return f32.DotUnitary(x[:n], y) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(badLenX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(badLenY) + } + return f32.DotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) +} diff --git a/blas/native/level1single_sdsdot.go b/blas/native/level1single_sdsdot.go new file mode 100644 index 00000000..b4f04691 --- /dev/null +++ b/blas/native/level1single_sdsdot.go @@ -0,0 +1,53 @@ +// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT. + +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/internal/asm/f32" +) + +// Sdsdot computes the dot product of the two vectors plus a constant +// alpha + \sum_i x[i]*y[i] +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32 { + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if n <= 0 { + if n == 0 { + return 0 + } + panic(negativeN) + } + if incX == 1 && incY == 1 { + if len(x) < n { + panic(badLenX) + } + if len(y) < n { + panic(badLenY) + } + return alpha + float32(f32.DdotUnitary(x[:n], y)) + } + var ix, iy int + if incX < 0 { + ix = (-n + 1) * incX + } + if incY < 0 { + iy = (-n + 1) * incY + } + if ix >= len(x) || ix+(n-1)*incX >= len(x) { + panic(badLenX) + } + if iy >= len(y) || iy+(n-1)*incY >= len(y) { + panic(badLenY) + } + return alpha + float32(f32.DdotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))) +} diff --git a/blas/native/level2double.go b/blas/native/level2double.go new file mode 100644 index 00000000..01c172c8 --- /dev/null +++ b/blas/native/level2double.go @@ -0,0 +1,2236 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/blas" + "github.com/gonum/internal/asm/f64" +) + +var _ blas.Float64Level2 = Implementation{} + +// Dgemv computes +// y = alpha * a * x + beta * y if tA = blas.NoTrans +// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + // Set up indexes + lenX := m + lenY := n + if tA == blas.NoTrans { + lenX = n + lenY = m + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic(badY) + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + + // Quick return if possible + if m == 0 || n == 0 || (alpha == 0 && beta == 1) { + return + } + + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(lenY - 1) * incY + } + + // First form y := beta * y + if incY > 0 { + Implementation{}.Dscal(lenY, beta, y, incY) + } else { + Implementation{}.Dscal(lenY, beta, y, -incY) + } + + if alpha == 0 { + return + } + + // Form y := alpha * A * x + y + if tA == blas.NoTrans { + if incX == 1 && incY == 1 { + for i := 0; i < m; i++ { + y[i] += alpha * f64.DotUnitary(a[lda*i:lda*i+n], x) + } + return + } + iy := ky + for i := 0; i < m; i++ { + y[iy] += alpha * f64.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0) + iy += incY + } + return + } + // Cases where a is transposed. + if incX == 1 && incY == 1 { + for i := 0; i < m; i++ { + tmp := alpha * x[i] + if tmp != 0 { + f64.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y) + } + } + return + } + ix := kx + for i := 0; i < m; i++ { + tmp := alpha * x[ix] + if tmp != 0 { + f64.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky)) + } + ix += incX + } +} + +// Dger performs the rank-one operation +// A += alpha * x * y^T +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { + // Check inputs + if m < 0 { + panic("m < 0") + } + if n < 0 { + panic(negativeN) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if lda < max(1, n) { + panic(badLdA) + } + + // Quick return if possible + if m == 0 || n == 0 || alpha == 0 { + return + } + + var ky, kx int + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + + if incX > 0 { + kx = 0 + } else { + kx = -(m - 1) * incX + } + + if incX == 1 && incY == 1 { + x = x[:m] + y = y[:n] + for i, xv := range x { + tmp := alpha * xv + if tmp != 0 { + atmp := a[i*lda : i*lda+n] + f64.AxpyUnitaryTo(atmp, tmp, y, atmp) + } + } + return + } + + ix := kx + for i := 0; i < m; i++ { + tmp := alpha * x[ix] + if tmp != 0 { + f64.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0) + } + ix += incX + } +} + +// Dgbmv computes +// y = alpha * A * x + beta * y if tA == blas.NoTrans +// y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans +// where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and +// m and n refer to the size of the full dense matrix it represents. +// x and y are vectors, and alpha and beta are scalars. +func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if kL < 0 { + panic(kLLT0) + } + if kL < 0 { + panic(kULT0) + } + if lda < kL+kU+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + // Set up indexes + lenX := m + lenY := n + if tA == blas.NoTrans { + lenX = n + lenY = m + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic(badY) + } + if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 { + panic(badLdA) + } + + // Quick return if possible + if m == 0 || n == 0 || (alpha == 0 && beta == 1) { + return + } + + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(lenY - 1) * incY + } + + // First form y := beta * y + if incY > 0 { + Implementation{}.Dscal(lenY, beta, y, incY) + } else { + Implementation{}.Dscal(lenY, beta, y, -incY) + } + + if alpha == 0 { + return + } + + // i and j are indices of the compacted banded matrix. + // off is the offset into the dense matrix (off + j = densej) + ld := min(m, n) + nCol := kU + 1 + kL + if tA == blas.NoTrans { + iy := ky + if incX == 1 { + for i := 0; i < m; i++ { + l := max(0, kL-i) + u := min(nCol, ld+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + xtmp := x[off : off+u-l] + var sum float64 + for j, v := range atmp { + sum += xtmp[j] * v + } + y[iy] += sum * alpha + iy += incY + } + return + } + for i := 0; i < m; i++ { + l := max(0, kL-i) + u := min(nCol, ld+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + jx := kx + var sum float64 + for _, v := range atmp { + sum += x[off*incX+jx] * v + jx += incX + } + y[iy] += sum * alpha + iy += incY + } + return + } + if incX == 1 { + for i := 0; i < m; i++ { + l := max(0, kL-i) + u := min(nCol, ld+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + tmp := alpha * x[i] + jy := ky + for _, v := range atmp { + y[jy+off*incY] += tmp * v + jy += incY + } + } + return + } + ix := kx + for i := 0; i < m; i++ { + l := max(0, kL-i) + u := min(nCol, ld+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + tmp := alpha * x[ix] + jy := ky + for _, v := range atmp { + y[jy+off*incY] += tmp * v + jy += incY + } + ix += incX + } +} + +// Dtrmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// A is an n×n Triangular matrix and x is a vector. +func (Implementation) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda < n { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if n == 0 { + return + } + nonUnit := d != blas.Unit + if n == 1 { + if nonUnit { + x[0] *= a[0] + } + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + ilda := i * lda + var tmp float64 + if nonUnit { + tmp = a[ilda+i] * x[i] + } else { + tmp = x[i] + } + xtmp := x[i+1:] + x[i] = tmp + f64.DotUnitary(a[ilda+i+1:ilda+n], xtmp) + } + return + } + ix := kx + for i := 0; i < n; i++ { + ilda := i * lda + var tmp float64 + if nonUnit { + tmp = a[ilda+i] * x[ix] + } else { + tmp = x[ix] + } + x[ix] = tmp + f64.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + ilda := i * lda + var tmp float64 + if nonUnit { + tmp += a[ilda+i] * x[i] + } else { + tmp = x[i] + } + x[i] = tmp + f64.DotUnitary(a[ilda:ilda+i], x) + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + ilda := i * lda + var tmp float64 + if nonUnit { + tmp = a[ilda+i] * x[ix] + } else { + tmp = x[ix] + } + x[ix] = tmp + f64.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) + ix -= incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + ilda := i * lda + xi := x[i] + f64.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) + if nonUnit { + x[i] *= a[ilda+i] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + ilda := i * lda + xi := x[ix] + f64.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) + if nonUnit { + x[ix] *= a[ilda+i] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + ilda := i * lda + xi := x[i] + f64.AxpyUnitary(xi, a[ilda:ilda+i], x) + if nonUnit { + x[i] *= a[i*lda+i] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + ilda := i * lda + xi := x[ix] + f64.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + if nonUnit { + x[ix] *= a[ilda+i] + } + ix += incX + } +} + +// Dtrsv solves +// A * x = b if tA == blas.NoTrans +// A^T * x = b if tA == blas.Trans or blas.ConjTrans +// A is an n×n triangular matrix and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { + // Test the input parameters + // Verify inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + // Quick return if possible + if n == 0 { + return + } + if n == 1 { + if d == blas.NonUnit { + x[0] /= a[0] + } + return + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + nonUnit := d == blas.NonUnit + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + var sum float64 + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jv := i + j + 1 + sum += x[jv] * v + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+i] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + var sum float64 + jx := ix + incX + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + sum += x[jx] * v + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+i] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + var sum float64 + atmp := a[i*lda : i*lda+i] + for j, v := range atmp { + sum += x[j] * v + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+i] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + var sum float64 + atmp := a[i*lda : i*lda+i] + for _, v := range atmp { + sum += x[jx] * v + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+i] + } + ix += incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if nonUnit { + x[i] /= a[i*lda+i] + } + xi := x[i] + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jv := j + i + 1 + x[jv] -= v * xi + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + if nonUnit { + x[ix] /= a[i*lda+i] + } + xi := x[ix] + jx := kx + (i+1)*incX + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + x[jx] -= v * xi + jx += incX + } + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[i] /= a[i*lda+i] + } + xi := x[i] + atmp := a[i*lda : i*lda+i] + for j, v := range atmp { + x[j] -= v * xi + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[ix] /= a[i*lda+i] + } + xi := x[ix] + jx := kx + atmp := a[i*lda : i*lda+i] + for _, v := range atmp { + x[jx] -= v * xi + jx += incX + } + ix -= incX + } +} + +// Dsymv computes +// y = alpha * A * x + beta * y, +// where a is an n×n symmetric matrix, x and y are vectors, and alpha and +// beta are scalars. +func (Implementation) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + // Check inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(negativeN) + } + if lda > 1 && lda < n { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + // Quick return if possible + if n == 0 || (alpha == 0 && beta == 1) { + return + } + + // Set up start points + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(n - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + + // Form y = beta * y + if beta != 1 { + if incY > 0 { + Implementation{}.Dscal(n, beta, y, incY) + } else { + Implementation{}.Dscal(n, beta, y, -incY) + } + } + + if alpha == 0 { + return + } + + if n == 1 { + y[0] += alpha * a[0] * x[0] + return + } + + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + sum := x[i] * a[i*lda+i] + jy := ky + (i+1)*incY + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jp := j + i + 1 + sum += x[jp] * v + y[jy] += xv * v + jy += incY + } + y[iy] += alpha * sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + sum := x[ix] * a[i*lda+i] + jx := kx + (i+1)*incX + jy := ky + (i+1)*incY + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + sum += x[jx] * v + y[jy] += xv * v + jx += incX + jy += incY + } + y[iy] += alpha * sum + ix += incX + iy += incY + } + return + } + // Cases where a is lower triangular. + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + jy := ky + xv := alpha * x[i] + atmp := a[i*lda : i*lda+i] + var sum float64 + for j, v := range atmp { + sum += x[j] * v + y[jy] += xv * v + jy += incY + } + sum += x[i] * a[i*lda+i] + sum *= alpha + y[iy] += sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + xv := alpha * x[ix] + atmp := a[i*lda : i*lda+i] + var sum float64 + for _, v := range atmp { + sum += x[jx] * v + y[jy] += xv * v + jx += incX + jy += incY + } + sum += x[ix] * a[i*lda+i] + sum *= alpha + y[iy] += sum + ix += incX + iy += incY + } +} + +// Dtbmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// where A is an n×n triangular banded matrix with k diagonals, and x is a vector. +func (Implementation) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if n == 0 { + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } else if incX != 1 { + kx = 0 + } + + nonunit := d != blas.Unit + + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + u := min(1+k, n-i) + var sum float64 + atmp := a[i*lda:] + xtmp := x[i:] + for j := 1; j < u; j++ { + sum += xtmp[j] * atmp[j] + } + if nonunit { + sum += xtmp[0] * atmp[0] + } else { + sum += xtmp[0] + } + x[i] = sum + } + return + } + ix := kx + for i := 0; i < n; i++ { + u := min(1+k, n-i) + var sum float64 + atmp := a[i*lda:] + jx := incX + for j := 1; j < u; j++ { + sum += x[ix+jx] * atmp[j] + jx += incX + } + if nonunit { + sum += x[ix] * atmp[0] + } else { + sum += x[ix] + } + x[ix] = sum + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + l := max(0, k-i) + atmp := a[i*lda:] + var sum float64 + for j := l; j < k; j++ { + sum += x[i-k+j] * atmp[j] + } + if nonunit { + sum += x[i] * atmp[k] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + l := max(0, k-i) + atmp := a[i*lda:] + var sum float64 + jx := l * incX + for j := l; j < k; j++ { + sum += x[ix-k*incX+jx] * atmp[j] + jx += incX + } + if nonunit { + sum += x[ix] * atmp[k] + } else { + sum += x[ix] + } + x[ix] = sum + ix -= incX + } + return + } + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + u := k + 1 + if i < u { + u = i + 1 + } + var sum float64 + for j := 1; j < u; j++ { + sum += x[i-j] * a[(i-j)*lda+j] + } + if nonunit { + sum += x[i] * a[i*lda] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + u := k + 1 + if i < u { + u = i + 1 + } + var sum float64 + jx := incX + for j := 1; j < u; j++ { + sum += x[ix-jx] * a[(i-j)*lda+j] + jx += incX + } + if nonunit { + sum += x[ix] * a[i*lda] + } else { + sum += x[ix] + } + x[ix] = sum + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + u := k + if i+k >= n { + u = n - i - 1 + } + var sum float64 + for j := 0; j < u; j++ { + sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] + } + if nonunit { + sum += x[i] * a[i*lda+k] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + for i := 0; i < n; i++ { + u := k + if i+k >= n { + u = n - i - 1 + } + var ( + sum float64 + jx int + ) + for j := 0; j < u; j++ { + sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] + jx += incX + } + if nonunit { + sum += x[ix] * a[i*lda+k] + } else { + sum += x[ix] + } + x[ix] = sum + ix += incX + } +} + +// Dtpmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// where A is an n×n unit triangular matrix in packed format, and x is a vector. +func (Implementation) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { + // Verify inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if len(ap) < (n*(n+1))/2 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if n == 0 { + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } + + nonUnit := d == blas.NonUnit + var offset int // Offset is the index of (i,i) + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if nonUnit { + xi *= ap[offset] + } + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xi += v * xtmp[j] + } + x[i] = xi + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + xix := x[ix] + if nonUnit { + xix *= ap[offset] + } + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + for _, v := range atmp { + xix += v * x[jx] + jx += incX + } + x[ix] = xix + offset += n - i + ix += incX + } + return + } + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xi := x[i] + if nonUnit { + xi *= ap[offset] + } + atmp := ap[offset-i : offset] + for j, v := range atmp { + xi += v * x[j] + } + x[i] = xi + offset -= i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xix := x[ix] + if nonUnit { + xix *= ap[offset] + } + atmp := ap[offset-i : offset] + jx := kx + for _, v := range atmp { + xix += v * x[jx] + jx += incX + } + x[ix] = xix + offset -= i + 1 + ix -= incX + } + return + } + // Cases where ap is transposed. + if ul == blas.Upper { + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xi := x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xtmp[j] += v * xi + } + if nonUnit { + x[i] *= ap[offset] + } + offset -= n - i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xix := x[ix] + jx := kx + (i+1)*incX + atmp := ap[offset+1 : offset+n-i] + for _, v := range atmp { + x[jx] += v * xix + jx += incX + } + if nonUnit { + x[ix] *= ap[offset] + } + offset -= n - i + 1 + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + atmp := ap[offset-i : offset] + for j, v := range atmp { + x[j] += v * xi + } + if nonUnit { + x[i] *= ap[offset] + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + xix := x[ix] + jx := kx + atmp := ap[offset-i : offset] + for _, v := range atmp { + x[jx] += v * xix + jx += incX + } + if nonUnit { + x[ix] *= ap[offset] + } + ix += incX + offset += i + 2 + } +} + +// Dtbsv solves +// A * x = b +// where A is an n×n triangular banded matrix with k diagonals in packed format, +// and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if n == 0 { + return + } + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } else { + kx = 0 + } + nonUnit := d == blas.NonUnit + // Form x = A^-1 x. + // Several cases below use subslices for speed improvement. + // The incX != 1 cases usually do not because incX may be negative. + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + atmp := a[i*lda+1:] + xtmp := x[i+1 : i+bands+1] + var sum float64 + for j, v := range xtmp { + sum += v * atmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + max := k + 1 + if i+max > n { + max = n - i + } + atmp := a[i*lda:] + var ( + jx int + sum float64 + ) + for j := 1; j < max; j++ { + jx += incX + sum += x[ix+jx] * atmp[j] + } + x[ix] -= sum + if nonUnit { + x[ix] /= atmp[0] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + atmp := a[i*lda+k-bands:] + xtmp := x[i-bands : i] + var sum float64 + for j, v := range xtmp { + sum += v * atmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= atmp[bands] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + atmp := a[i*lda+k-bands:] + var ( + sum float64 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix-bands*incX+jx] * atmp[j] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= atmp[bands] + } + ix += incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + var sum float64 + for j := 0; j < bands; j++ { + sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + var ( + sum float64 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda] + } + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + var sum float64 + xtmp := x[i+1 : i+1+bands] + for j, v := range xtmp { + sum += v * a[(i+j+1)*lda+k-j-1] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+k] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + var ( + sum float64 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+k] + } + ix -= incX + } +} + +// Dsbmv performs +// y = alpha * A * x + beta * y +// where A is an n×n symmetric banded matrix, x and y are vectors, and alpha +// and beta are scalars. +func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic(badLdA) + } + + // Quick return if possible + if n == 0 || (alpha == 0 && beta == 1) { + return + } + + // Set up indexes + lenX := n + lenY := n + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(lenY - 1) * incY + } + + // First form y := beta * y + if incY > 0 { + Implementation{}.Dscal(lenY, beta, y, incY) + } else { + Implementation{}.Dscal(lenY, beta, y, -incY) + } + + if alpha == 0 { + return + } + + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + atmp := a[i*lda:] + tmp := alpha * x[i] + sum := tmp * atmp[0] + u := min(k, n-i-1) + jy := incY + for j := 1; j <= u; j++ { + v := atmp[j] + sum += alpha * x[i+j] * v + y[iy+jy] += tmp * v + jy += incY + } + y[iy] += sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + atmp := a[i*lda:] + tmp := alpha * x[ix] + sum := tmp * atmp[0] + u := min(k, n-i-1) + jx := incX + jy := incY + for j := 1; j <= u; j++ { + v := atmp[j] + sum += alpha * x[ix+jx] * v + y[iy+jy] += tmp * v + jx += incX + jy += incY + } + y[iy] += sum + ix += incX + iy += incY + } + return + } + + // Casses where a has bands below the diagonal. + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + l := max(0, k-i) + tmp := alpha * x[i] + jy := l * incY + atmp := a[i*lda:] + for j := l; j < k; j++ { + v := atmp[j] + y[iy] += alpha * v * x[i-k+j] + y[iy-k*incY+jy] += tmp * v + jy += incY + } + y[iy] += tmp * atmp[k] + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + l := max(0, k-i) + tmp := alpha * x[ix] + jx := l * incX + jy := l * incY + atmp := a[i*lda:] + for j := l; j < k; j++ { + v := atmp[j] + y[iy] += alpha * v * x[ix-k*incX+jx] + y[iy-k*incY+jy] += tmp * v + jx += incX + jy += incY + } + y[iy] += tmp * atmp[k] + ix += incX + iy += incY + } + return +} + +// Dsyr performs the rank-one update +// a += alpha * x * x^T +// where a is an n×n symmetric matrix, and x is a vector. +func (Implementation) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if alpha == 0 || n == 0 { + return + } + + lenX := n + var kx int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + tmp := x[i] * alpha + if tmp != 0 { + atmp := a[i*lda+i : i*lda+n] + xtmp := x[i:n] + for j, v := range xtmp { + atmp[j] += v * tmp + } + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + tmp := x[ix] * alpha + if tmp != 0 { + jx := ix + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += x[jx] * tmp + jx += incX + } + } + ix += incX + } + return + } + // Cases where a is lower triangular. + if incX == 1 { + for i := 0; i < n; i++ { + tmp := x[i] * alpha + if tmp != 0 { + atmp := a[i*lda:] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += tmp * v + } + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + tmp := x[ix] * alpha + if tmp != 0 { + atmp := a[i*lda:] + jx := kx + for j := 0; j < i+1; j++ { + atmp[j] += tmp * x[jx] + jx += incX + } + } + ix += incX + } +} + +// Dsyr2 performs the symmetric rank-two update +// A += alpha * x * y^T + alpha * y * x^T +// where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar. +func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if alpha == 0 { + return + } + + var ky, kx int + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + if incX > 0 { + kx = 0 + } else { + kx = -(n - 1) * incX + } + if ul == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + xi := x[i] + yi := y[i] + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += alpha * (xi*y[j] + x[j]*yi) + } + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + i*incX + jy := ky + i*incY + xi := x[ix] + yi := y[iy] + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + } + return + } + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + xi := x[i] + yi := y[i] + atmp := a[i*lda:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (xi*y[j] + x[j]*yi) + } + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + xi := x[ix] + yi := y[iy] + atmp := a[i*lda:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + } + return +} + +// Dtpsv solves +// A * x = b if tA == blas.NoTrans +// A^T * x = b if tA == blas.Trans or blas.ConjTrans +// where A is an n×n triangular matrix in packed format and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +func (Implementation) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { + // Verify inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if len(ap) < (n*(n+1))/2 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if n == 0 { + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } + + nonUnit := d == blas.NonUnit + var offset int // Offset is the index of (i,i) + if tA == blas.NoTrans { + if ul == blas.Upper { + offset = n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + var sum float64 + for j, v := range atmp { + sum += v * xtmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= ap[offset] + } + offset -= n - i + 1 + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + var sum float64 + for _, v := range atmp { + sum += v * x[jx] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= ap[offset] + } + ix -= incX + offset -= n - i + 1 + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i : offset] + var sum float64 + for j, v := range atmp { + sum += v * x[j] + } + x[i] -= sum + if nonUnit { + x[i] /= ap[offset] + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + atmp := ap[offset-i : offset] + var sum float64 + for _, v := range atmp { + sum += v * x[jx] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= ap[offset] + } + ix += incX + offset += i + 2 + } + return + } + // Cases where ap is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if nonUnit { + x[i] /= ap[offset] + } + xi := x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xtmp[j] -= v * xi + } + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + if nonUnit { + x[ix] /= ap[offset] + } + xix := x[ix] + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + for _, v := range atmp { + x[jx] -= v * xix + jx += incX + } + ix += incX + offset += n - i + } + return + } + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[i] /= ap[offset] + } + xi := x[i] + atmp := ap[offset-i : offset] + for j, v := range atmp { + x[j] -= v * xi + } + offset -= i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[ix] /= ap[offset] + } + xix := x[ix] + atmp := ap[offset-i : offset] + jx := kx + for _, v := range atmp { + x[jx] -= v * xix + jx += incX + } + ix -= incX + offset -= i + 1 + } +} + +// Dspmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric matrix in packed format, x and y are vectors +// and alpha and beta are scalars. +func (Implementation) Dspmv(ul blas.Uplo, n int, alpha float64, a []float64, x []float64, incX int, beta float64, y []float64, incY int) { + // Verify inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if len(a) < (n*(n+1))/2 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + // Quick return if possible + if n == 0 || (alpha == 0 && beta == 1) { + return + } + + // Set up start points + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(n - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + + // Form y = beta * y + if beta != 1 { + if incY > 0 { + Implementation{}.Dscal(n, beta, y, incY) + } else { + Implementation{}.Dscal(n, beta, y, -incY) + } + } + + if alpha == 0 { + return + } + + if n == 1 { + y[0] += alpha * a[0] * x[0] + return + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + sum := a[offset] * x[i] + atmp := a[offset+1 : offset+n-i] + xtmp := x[i+1:] + jy := ky + (i+1)*incY + for j, v := range atmp { + sum += v * xtmp[j] + y[jy] += v * xv + jy += incY + } + y[iy] += alpha * sum + iy += incY + offset += n - i + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + sum := a[offset] * x[ix] + atmp := a[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + jy := ky + (i+1)*incY + for _, v := range atmp { + sum += v * x[jx] + y[jy] += v * xv + jx += incX + jy += incY + } + y[iy] += alpha * sum + ix += incX + iy += incY + offset += n - i + } + return + } + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + atmp := a[offset-i : offset] + jy := ky + var sum float64 + for j, v := range atmp { + sum += v * x[j] + y[jy] += v * xv + jy += incY + } + sum += a[offset] * x[i] + y[iy] += alpha * sum + iy += incY + offset += i + 2 + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + atmp := a[offset-i : offset] + jx := kx + jy := ky + var sum float64 + for _, v := range atmp { + sum += v * x[jx] + y[jy] += v * xv + jx += incX + jy += incY + } + + sum += a[offset] * x[ix] + y[iy] += alpha * sum + ix += incX + iy += incY + offset += i + 2 + } +} + +// Dspr computes the rank-one operation +// a += alpha * x * x^T +// where a is an n×n symmetric matrix in packed format, x is a vector, and +// alpha is a scalar. +func (Implementation) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if len(a) < (n*(n+1))/2 { + panic(badLdA) + } + if alpha == 0 || n == 0 { + return + } + lenX := n + var kx int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + atmp := a[offset:] + xv := alpha * x[i] + xtmp := x[i:n] + for j, v := range xtmp { + atmp[j] += xv * v + } + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + i*incX + atmp := a[offset:] + xv := alpha * x[ix] + for j := 0; j < n-i; j++ { + atmp[j] += xv * x[jx] + jx += incX + } + ix += incX + offset += n - i + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + atmp := a[offset-i:] + xv := alpha * x[i] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += xv * v + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + atmp := a[offset-i:] + xv := alpha * x[ix] + for j := 0; j <= i; j++ { + atmp[j] += xv * x[jx] + jx += incX + } + ix += incX + offset += i + 2 + } +} + +// Dspr2 performs the symmetric rank-2 update +// A += alpha * x * y^T + alpha * y * x^T, +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha is a scalar. +func (Implementation) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, ap []float64) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if len(ap) < (n*(n+1))/2 { + panic(badLdA) + } + if alpha == 0 { + return + } + var ky, kx int + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + if incX > 0 { + kx = 0 + } else { + kx = -(n - 1) * incX + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset:] + xi := x[i] + yi := y[i] + xtmp := x[i:n] + ytmp := y[i:n] + for j, v := range xtmp { + atmp[j] += alpha * (xi*ytmp[j] + v*yi) + } + offset += n - i + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + i*incX + jy := ky + i*incY + atmp := ap[offset:] + xi := x[ix] + yi := y[iy] + for j := 0; j < n-i; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + offset += n - i + } + return + } + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i:] + xi := x[i] + yi := y[i] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += alpha * (xi*y[j] + v*yi) + } + offset += i + 2 + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + atmp := ap[offset-i:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) + jx += incX + jy += incY + } + ix += incX + iy += incY + offset += i + 2 + } +} diff --git a/blas/native/level2double_test.go b/blas/native/level2double_test.go new file mode 100644 index 00000000..d5593b0e --- /dev/null +++ b/blas/native/level2double_test.go @@ -0,0 +1,79 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func TestDgemv(t *testing.T) { + testblas.DgemvTest(t, impl) +} + +func TestDger(t *testing.T) { + testblas.DgerTest(t, impl) +} + +func TestDtxmv(t *testing.T) { + testblas.DtxmvTest(t, impl) +} + +func TestDgbmv(t *testing.T) { + testblas.DgbmvTest(t, impl) +} + +func TestDtbsv(t *testing.T) { + testblas.DtbsvTest(t, impl) +} + +func TestDsbmv(t *testing.T) { + testblas.DsbmvTest(t, impl) +} + +func TestDtbmv(t *testing.T) { + testblas.DtbmvTest(t, impl) +} + +func TestDtrsv(t *testing.T) { + testblas.DtrsvTest(t, impl) +} + +func TestDtrmv(t *testing.T) { + testblas.DtrmvTest(t, impl) +} + +func TestDsymv(t *testing.T) { + testblas.DsymvTest(t, impl) +} + +func TestDsyr(t *testing.T) { + testblas.DsyrTest(t, impl) +} + +func TestDsyr2(t *testing.T) { + testblas.Dsyr2Test(t, impl) +} + +func TestDspr2(t *testing.T) { + testblas.Dspr2Test(t, impl) +} + +func TestDspr(t *testing.T) { + testblas.DsprTest(t, impl) +} + +func TestDspmv(t *testing.T) { + testblas.DspmvTest(t, impl) +} + +func TestDtpsv(t *testing.T) { + testblas.DtpsvTest(t, impl) +} + +func TestDtpmv(t *testing.T) { + testblas.DtpmvTest(t, impl) +} diff --git a/blas/native/level2single.go b/blas/native/level2single.go new file mode 100644 index 00000000..cf66b451 --- /dev/null +++ b/blas/native/level2single.go @@ -0,0 +1,2270 @@ +// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT. + +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/blas" + "github.com/gonum/internal/asm/f32" +) + +var _ blas.Float32Level2 = Implementation{} + +// Sgemv computes +// y = alpha * a * x + beta * y if tA = blas.NoTrans +// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if lda < max(1, n) { + panic(badLdA) + } + + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + // Set up indexes + lenX := m + lenY := n + if tA == blas.NoTrans { + lenX = n + lenY = m + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic(badY) + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + + // Quick return if possible + if m == 0 || n == 0 || (alpha == 0 && beta == 1) { + return + } + + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(lenY - 1) * incY + } + + // First form y := beta * y + if incY > 0 { + Implementation{}.Sscal(lenY, beta, y, incY) + } else { + Implementation{}.Sscal(lenY, beta, y, -incY) + } + + if alpha == 0 { + return + } + + // Form y := alpha * A * x + y + if tA == blas.NoTrans { + if incX == 1 && incY == 1 { + for i := 0; i < m; i++ { + y[i] += alpha * f32.DotUnitary(a[lda*i:lda*i+n], x) + } + return + } + iy := ky + for i := 0; i < m; i++ { + y[iy] += alpha * f32.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0) + iy += incY + } + return + } + // Cases where a is transposed. + if incX == 1 && incY == 1 { + for i := 0; i < m; i++ { + tmp := alpha * x[i] + if tmp != 0 { + f32.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y) + } + } + return + } + ix := kx + for i := 0; i < m; i++ { + tmp := alpha * x[ix] + if tmp != 0 { + f32.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky)) + } + ix += incX + } +} + +// Sger performs the rank-one operation +// A += alpha * x * y^T +// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { + // Check inputs + if m < 0 { + panic("m < 0") + } + if n < 0 { + panic(negativeN) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if lda*(m-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if lda < max(1, n) { + panic(badLdA) + } + + // Quick return if possible + if m == 0 || n == 0 || alpha == 0 { + return + } + + var ky, kx int + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + + if incX > 0 { + kx = 0 + } else { + kx = -(m - 1) * incX + } + + if incX == 1 && incY == 1 { + x = x[:m] + y = y[:n] + for i, xv := range x { + tmp := alpha * xv + if tmp != 0 { + atmp := a[i*lda : i*lda+n] + f32.AxpyUnitaryTo(atmp, tmp, y, atmp) + } + } + return + } + + ix := kx + for i := 0; i < m; i++ { + tmp := alpha * x[ix] + if tmp != 0 { + f32.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0) + } + ix += incX + } +} + +// Sgbmv computes +// y = alpha * A * x + beta * y if tA == blas.NoTrans +// y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans +// where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and +// m and n refer to the size of the full dense matrix it represents. +// x and y are vectors, and alpha and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if kL < 0 { + panic(kLLT0) + } + if kL < 0 { + panic(kULT0) + } + if lda < kL+kU+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + // Set up indexes + lenX := m + lenY := n + if tA == blas.NoTrans { + lenX = n + lenY = m + } + if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { + panic(badY) + } + if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 { + panic(badLdA) + } + + // Quick return if possible + if m == 0 || n == 0 || (alpha == 0 && beta == 1) { + return + } + + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(lenY - 1) * incY + } + + // First form y := beta * y + if incY > 0 { + Implementation{}.Sscal(lenY, beta, y, incY) + } else { + Implementation{}.Sscal(lenY, beta, y, -incY) + } + + if alpha == 0 { + return + } + + // i and j are indices of the compacted banded matrix. + // off is the offset into the dense matrix (off + j = densej) + ld := min(m, n) + nCol := kU + 1 + kL + if tA == blas.NoTrans { + iy := ky + if incX == 1 { + for i := 0; i < m; i++ { + l := max(0, kL-i) + u := min(nCol, ld+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + xtmp := x[off : off+u-l] + var sum float32 + for j, v := range atmp { + sum += xtmp[j] * v + } + y[iy] += sum * alpha + iy += incY + } + return + } + for i := 0; i < m; i++ { + l := max(0, kL-i) + u := min(nCol, ld+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + jx := kx + var sum float32 + for _, v := range atmp { + sum += x[off*incX+jx] * v + jx += incX + } + y[iy] += sum * alpha + iy += incY + } + return + } + if incX == 1 { + for i := 0; i < m; i++ { + l := max(0, kL-i) + u := min(nCol, ld+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + tmp := alpha * x[i] + jy := ky + for _, v := range atmp { + y[jy+off*incY] += tmp * v + jy += incY + } + } + return + } + ix := kx + for i := 0; i < m; i++ { + l := max(0, kL-i) + u := min(nCol, ld+kL-i) + off := max(0, i-kL) + atmp := a[i*lda+l : i*lda+u] + tmp := alpha * x[ix] + jy := ky + for _, v := range atmp { + y[jy+off*incY] += tmp * v + jy += incY + } + ix += incX + } +} + +// Strmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// A is an n×n Triangular matrix and x is a vector. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda < n { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if n == 0 { + return + } + nonUnit := d != blas.Unit + if n == 1 { + if nonUnit { + x[0] *= a[0] + } + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + ilda := i * lda + var tmp float32 + if nonUnit { + tmp = a[ilda+i] * x[i] + } else { + tmp = x[i] + } + xtmp := x[i+1:] + x[i] = tmp + f32.DotUnitary(a[ilda+i+1:ilda+n], xtmp) + } + return + } + ix := kx + for i := 0; i < n; i++ { + ilda := i * lda + var tmp float32 + if nonUnit { + tmp = a[ilda+i] * x[ix] + } else { + tmp = x[ix] + } + x[ix] = tmp + f32.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + ilda := i * lda + var tmp float32 + if nonUnit { + tmp += a[ilda+i] * x[i] + } else { + tmp = x[i] + } + x[i] = tmp + f32.DotUnitary(a[ilda:ilda+i], x) + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + ilda := i * lda + var tmp float32 + if nonUnit { + tmp = a[ilda+i] * x[ix] + } else { + tmp = x[ix] + } + x[ix] = tmp + f32.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) + ix -= incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + ilda := i * lda + xi := x[i] + f32.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) + if nonUnit { + x[i] *= a[ilda+i] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + ilda := i * lda + xi := x[ix] + f32.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) + if nonUnit { + x[ix] *= a[ilda+i] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + ilda := i * lda + xi := x[i] + f32.AxpyUnitary(xi, a[ilda:ilda+i], x) + if nonUnit { + x[i] *= a[i*lda+i] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + ilda := i * lda + xi := x[ix] + f32.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) + if nonUnit { + x[ix] *= a[ilda+i] + } + ix += incX + } +} + +// Strsv solves +// A * x = b if tA == blas.NoTrans +// A^T * x = b if tA == blas.Trans or blas.ConjTrans +// A is an n×n triangular matrix and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { + // Test the input parameters + // Verify inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + // Quick return if possible + if n == 0 { + return + } + if n == 1 { + if d == blas.NonUnit { + x[0] /= a[0] + } + return + } + + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } + nonUnit := d == blas.NonUnit + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + var sum float32 + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jv := i + j + 1 + sum += x[jv] * v + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+i] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + var sum float32 + jx := ix + incX + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + sum += x[jx] * v + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+i] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + var sum float32 + atmp := a[i*lda : i*lda+i] + for j, v := range atmp { + sum += x[j] * v + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+i] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + var sum float32 + atmp := a[i*lda : i*lda+i] + for _, v := range atmp { + sum += x[jx] * v + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+i] + } + ix += incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if nonUnit { + x[i] /= a[i*lda+i] + } + xi := x[i] + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jv := j + i + 1 + x[jv] -= v * xi + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + if nonUnit { + x[ix] /= a[i*lda+i] + } + xi := x[ix] + jx := kx + (i+1)*incX + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + x[jx] -= v * xi + jx += incX + } + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[i] /= a[i*lda+i] + } + xi := x[i] + atmp := a[i*lda : i*lda+i] + for j, v := range atmp { + x[j] -= v * xi + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[ix] /= a[i*lda+i] + } + xi := x[ix] + jx := kx + atmp := a[i*lda : i*lda+i] + for _, v := range atmp { + x[jx] -= v * xi + jx += incX + } + ix -= incX + } +} + +// Ssymv computes +// y = alpha * A * x + beta * y, +// where a is an n×n symmetric matrix, x and y are vectors, and alpha and +// beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + // Check inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(negativeN) + } + if lda > 1 && lda < n { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + // Quick return if possible + if n == 0 || (alpha == 0 && beta == 1) { + return + } + + // Set up start points + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(n - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + + // Form y = beta * y + if beta != 1 { + if incY > 0 { + Implementation{}.Sscal(n, beta, y, incY) + } else { + Implementation{}.Sscal(n, beta, y, -incY) + } + } + + if alpha == 0 { + return + } + + if n == 1 { + y[0] += alpha * a[0] * x[0] + return + } + + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + sum := x[i] * a[i*lda+i] + jy := ky + (i+1)*incY + atmp := a[i*lda+i+1 : i*lda+n] + for j, v := range atmp { + jp := j + i + 1 + sum += x[jp] * v + y[jy] += xv * v + jy += incY + } + y[iy] += alpha * sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + sum := x[ix] * a[i*lda+i] + jx := kx + (i+1)*incX + jy := ky + (i+1)*incY + atmp := a[i*lda+i+1 : i*lda+n] + for _, v := range atmp { + sum += x[jx] * v + y[jy] += xv * v + jx += incX + jy += incY + } + y[iy] += alpha * sum + ix += incX + iy += incY + } + return + } + // Cases where a is lower triangular. + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + jy := ky + xv := alpha * x[i] + atmp := a[i*lda : i*lda+i] + var sum float32 + for j, v := range atmp { + sum += x[j] * v + y[jy] += xv * v + jy += incY + } + sum += x[i] * a[i*lda+i] + sum *= alpha + y[iy] += sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + xv := alpha * x[ix] + atmp := a[i*lda : i*lda+i] + var sum float32 + for _, v := range atmp { + sum += x[jx] * v + y[jy] += xv * v + jx += incX + jy += incY + } + sum += x[ix] * a[i*lda+i] + sum *= alpha + y[iy] += sum + ix += incX + iy += incY + } +} + +// Stbmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// where A is an n×n triangular banded matrix with k diagonals, and x is a vector. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if n == 0 { + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } else if incX != 1 { + kx = 0 + } + + nonunit := d != blas.Unit + + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + u := min(1+k, n-i) + var sum float32 + atmp := a[i*lda:] + xtmp := x[i:] + for j := 1; j < u; j++ { + sum += xtmp[j] * atmp[j] + } + if nonunit { + sum += xtmp[0] * atmp[0] + } else { + sum += xtmp[0] + } + x[i] = sum + } + return + } + ix := kx + for i := 0; i < n; i++ { + u := min(1+k, n-i) + var sum float32 + atmp := a[i*lda:] + jx := incX + for j := 1; j < u; j++ { + sum += x[ix+jx] * atmp[j] + jx += incX + } + if nonunit { + sum += x[ix] * atmp[0] + } else { + sum += x[ix] + } + x[ix] = sum + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + l := max(0, k-i) + atmp := a[i*lda:] + var sum float32 + for j := l; j < k; j++ { + sum += x[i-k+j] * atmp[j] + } + if nonunit { + sum += x[i] * atmp[k] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + l := max(0, k-i) + atmp := a[i*lda:] + var sum float32 + jx := l * incX + for j := l; j < k; j++ { + sum += x[ix-k*incX+jx] * atmp[j] + jx += incX + } + if nonunit { + sum += x[ix] * atmp[k] + } else { + sum += x[ix] + } + x[ix] = sum + ix -= incX + } + return + } + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + u := k + 1 + if i < u { + u = i + 1 + } + var sum float32 + for j := 1; j < u; j++ { + sum += x[i-j] * a[(i-j)*lda+j] + } + if nonunit { + sum += x[i] * a[i*lda] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + u := k + 1 + if i < u { + u = i + 1 + } + var sum float32 + jx := incX + for j := 1; j < u; j++ { + sum += x[ix-jx] * a[(i-j)*lda+j] + jx += incX + } + if nonunit { + sum += x[ix] * a[i*lda] + } else { + sum += x[ix] + } + x[ix] = sum + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + u := k + if i+k >= n { + u = n - i - 1 + } + var sum float32 + for j := 0; j < u; j++ { + sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] + } + if nonunit { + sum += x[i] * a[i*lda+k] + } else { + sum += x[i] + } + x[i] = sum + } + return + } + ix := kx + for i := 0; i < n; i++ { + u := k + if i+k >= n { + u = n - i - 1 + } + var ( + sum float32 + jx int + ) + for j := 0; j < u; j++ { + sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] + jx += incX + } + if nonunit { + sum += x[ix] * a[i*lda+k] + } else { + sum += x[ix] + } + x[ix] = sum + ix += incX + } +} + +// Stpmv computes +// x = A * x if tA == blas.NoTrans +// x = A^T * x if tA == blas.Trans or blas.ConjTrans +// where A is an n×n unit triangular matrix in packed format, and x is a vector. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { + // Verify inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if len(ap) < (n*(n+1))/2 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if n == 0 { + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } + + nonUnit := d == blas.NonUnit + var offset int // Offset is the index of (i,i) + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + if nonUnit { + xi *= ap[offset] + } + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xi += v * xtmp[j] + } + x[i] = xi + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + xix := x[ix] + if nonUnit { + xix *= ap[offset] + } + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + for _, v := range atmp { + xix += v * x[jx] + jx += incX + } + x[ix] = xix + offset += n - i + ix += incX + } + return + } + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xi := x[i] + if nonUnit { + xi *= ap[offset] + } + atmp := ap[offset-i : offset] + for j, v := range atmp { + xi += v * x[j] + } + x[i] = xi + offset -= i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xix := x[ix] + if nonUnit { + xix *= ap[offset] + } + atmp := ap[offset-i : offset] + jx := kx + for _, v := range atmp { + xix += v * x[jx] + jx += incX + } + x[ix] = xix + offset -= i + 1 + ix -= incX + } + return + } + // Cases where ap is transposed. + if ul == blas.Upper { + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xi := x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xtmp[j] += v * xi + } + if nonUnit { + x[i] *= ap[offset] + } + offset -= n - i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + xix := x[ix] + jx := kx + (i+1)*incX + atmp := ap[offset+1 : offset+n-i] + for _, v := range atmp { + x[jx] += v * xix + jx += incX + } + if nonUnit { + x[ix] *= ap[offset] + } + offset -= n - i + 1 + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + xi := x[i] + atmp := ap[offset-i : offset] + for j, v := range atmp { + x[j] += v * xi + } + if nonUnit { + x[i] *= ap[offset] + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + xix := x[ix] + jx := kx + atmp := ap[offset-i : offset] + for _, v := range atmp { + x[jx] += v * xix + jx += incX + } + if nonUnit { + x[ix] *= ap[offset] + } + ix += incX + offset += i + 2 + } +} + +// Stbsv solves +// A * x = b +// where A is an n×n triangular banded matrix with k diagonals in packed format, +// and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if n == 0 { + return + } + var kx int + if incX < 0 { + kx = -(n - 1) * incX + } else { + kx = 0 + } + nonUnit := d == blas.NonUnit + // Form x = A^-1 x. + // Several cases below use subslices for speed improvement. + // The incX != 1 cases usually do not because incX may be negative. + if tA == blas.NoTrans { + if ul == blas.Upper { + if incX == 1 { + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + atmp := a[i*lda+1:] + xtmp := x[i+1 : i+bands+1] + var sum float32 + for j, v := range xtmp { + sum += v * atmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + max := k + 1 + if i+max > n { + max = n - i + } + atmp := a[i*lda:] + var ( + jx int + sum float32 + ) + for j := 1; j < max; j++ { + jx += incX + sum += x[ix+jx] * atmp[j] + } + x[ix] -= sum + if nonUnit { + x[ix] /= atmp[0] + } + ix -= incX + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + atmp := a[i*lda+k-bands:] + xtmp := x[i-bands : i] + var sum float32 + for j, v := range xtmp { + sum += v * atmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= atmp[bands] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + atmp := a[i*lda+k-bands:] + var ( + sum float32 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix-bands*incX+jx] * atmp[j] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= atmp[bands] + } + ix += incX + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + var sum float32 + for j := 0; j < bands; j++ { + sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda] + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + bands := k + if i-k < 0 { + bands = i + } + var ( + sum float32 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda] + } + ix += incX + } + return + } + if incX == 1 { + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + var sum float32 + xtmp := x[i+1 : i+1+bands] + for j, v := range xtmp { + sum += v * a[(i+j+1)*lda+k-j-1] + } + x[i] -= sum + if nonUnit { + x[i] /= a[i*lda+k] + } + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + bands := k + if i+bands >= n { + bands = n - i - 1 + } + var ( + sum float32 + jx int + ) + for j := 0; j < bands; j++ { + sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= a[i*lda+k] + } + ix -= incX + } +} + +// Ssbmv performs +// y = alpha * A * x + beta * y +// where A is an n×n symmetric banded matrix, x and y are vectors, and alpha +// and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if lda*(n-1)+k+1 > len(a) || lda < k+1 { + panic(badLdA) + } + + // Quick return if possible + if n == 0 || (alpha == 0 && beta == 1) { + return + } + + // Set up indexes + lenX := n + lenY := n + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(lenY - 1) * incY + } + + // First form y := beta * y + if incY > 0 { + Implementation{}.Sscal(lenY, beta, y, incY) + } else { + Implementation{}.Sscal(lenY, beta, y, -incY) + } + + if alpha == 0 { + return + } + + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + atmp := a[i*lda:] + tmp := alpha * x[i] + sum := tmp * atmp[0] + u := min(k, n-i-1) + jy := incY + for j := 1; j <= u; j++ { + v := atmp[j] + sum += alpha * x[i+j] * v + y[iy+jy] += tmp * v + jy += incY + } + y[iy] += sum + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + atmp := a[i*lda:] + tmp := alpha * x[ix] + sum := tmp * atmp[0] + u := min(k, n-i-1) + jx := incX + jy := incY + for j := 1; j <= u; j++ { + v := atmp[j] + sum += alpha * x[ix+jx] * v + y[iy+jy] += tmp * v + jx += incX + jy += incY + } + y[iy] += sum + ix += incX + iy += incY + } + return + } + + // Casses where a has bands below the diagonal. + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + l := max(0, k-i) + tmp := alpha * x[i] + jy := l * incY + atmp := a[i*lda:] + for j := l; j < k; j++ { + v := atmp[j] + y[iy] += alpha * v * x[i-k+j] + y[iy-k*incY+jy] += tmp * v + jy += incY + } + y[iy] += tmp * atmp[k] + iy += incY + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + l := max(0, k-i) + tmp := alpha * x[ix] + jx := l * incX + jy := l * incY + atmp := a[i*lda:] + for j := l; j < k; j++ { + v := atmp[j] + y[iy] += alpha * v * x[ix-k*incX+jx] + y[iy-k*incY+jy] += tmp * v + jx += incX + jy += incY + } + y[iy] += tmp * atmp[k] + ix += incX + iy += incY + } + return +} + +// Ssyr performs the rank-one update +// a += alpha * x * x^T +// where a is an n×n symmetric matrix, and x is a vector. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if alpha == 0 || n == 0 { + return + } + + lenX := n + var kx int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + tmp := x[i] * alpha + if tmp != 0 { + atmp := a[i*lda+i : i*lda+n] + xtmp := x[i:n] + for j, v := range xtmp { + atmp[j] += v * tmp + } + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + tmp := x[ix] * alpha + if tmp != 0 { + jx := ix + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += x[jx] * tmp + jx += incX + } + } + ix += incX + } + return + } + // Cases where a is lower triangular. + if incX == 1 { + for i := 0; i < n; i++ { + tmp := x[i] * alpha + if tmp != 0 { + atmp := a[i*lda:] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += tmp * v + } + } + } + return + } + ix := kx + for i := 0; i < n; i++ { + tmp := x[ix] * alpha + if tmp != 0 { + atmp := a[i*lda:] + jx := kx + for j := 0; j < i+1; j++ { + atmp[j] += tmp * x[jx] + jx += incX + } + } + ix += incX + } +} + +// Ssyr2 performs the symmetric rank-two update +// A += alpha * x * y^T + alpha * y * x^T +// where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if lda*(n-1)+n > len(a) || lda < max(1, n) { + panic(badLdA) + } + if alpha == 0 { + return + } + + var ky, kx int + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + if incX > 0 { + kx = 0 + } else { + kx = -(n - 1) * incX + } + if ul == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + xi := x[i] + yi := y[i] + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += alpha * (xi*y[j] + x[j]*yi) + } + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + i*incX + jy := ky + i*incY + xi := x[ix] + yi := y[iy] + atmp := a[i*lda:] + for j := i; j < n; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + } + return + } + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + xi := x[i] + yi := y[i] + atmp := a[i*lda:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (xi*y[j] + x[j]*yi) + } + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + xi := x[ix] + yi := y[iy] + atmp := a[i*lda:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + } + return +} + +// Stpsv solves +// A * x = b if tA == blas.NoTrans +// A^T * x = b if tA == blas.Trans or blas.ConjTrans +// where A is an n×n triangular matrix in packed format and x is a vector. +// At entry to the function, x contains the values of b, and the result is +// stored in place into x. +// +// No test for singularity or near-singularity is included in this +// routine. Such tests must be performed before calling this routine. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { + // Verify inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if n < 0 { + panic(nLT0) + } + if len(ap) < (n*(n+1))/2 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if n == 0 { + return + } + var kx int + if incX <= 0 { + kx = -(n - 1) * incX + } + + nonUnit := d == blas.NonUnit + var offset int // Offset is the index of (i,i) + if tA == blas.NoTrans { + if ul == blas.Upper { + offset = n*(n+1)/2 - 1 + if incX == 1 { + for i := n - 1; i >= 0; i-- { + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + var sum float32 + for j, v := range atmp { + sum += v * xtmp[j] + } + x[i] -= sum + if nonUnit { + x[i] /= ap[offset] + } + offset -= n - i + 1 + } + return + } + ix := kx + (n-1)*incX + for i := n - 1; i >= 0; i-- { + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + var sum float32 + for _, v := range atmp { + sum += v * x[jx] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= ap[offset] + } + ix -= incX + offset -= n - i + 1 + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i : offset] + var sum float32 + for j, v := range atmp { + sum += v * x[j] + } + x[i] -= sum + if nonUnit { + x[i] /= ap[offset] + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + atmp := ap[offset-i : offset] + var sum float32 + for _, v := range atmp { + sum += v * x[jx] + jx += incX + } + x[ix] -= sum + if nonUnit { + x[ix] /= ap[offset] + } + ix += incX + offset += i + 2 + } + return + } + // Cases where ap is transposed. + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + if nonUnit { + x[i] /= ap[offset] + } + xi := x[i] + atmp := ap[offset+1 : offset+n-i] + xtmp := x[i+1:] + for j, v := range atmp { + xtmp[j] -= v * xi + } + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + if nonUnit { + x[ix] /= ap[offset] + } + xix := x[ix] + atmp := ap[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + for _, v := range atmp { + x[jx] -= v * xix + jx += incX + } + ix += incX + offset += n - i + } + return + } + if incX == 1 { + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[i] /= ap[offset] + } + xi := x[i] + atmp := ap[offset-i : offset] + for j, v := range atmp { + x[j] -= v * xi + } + offset -= i + 1 + } + return + } + ix := kx + (n-1)*incX + offset = n*(n+1)/2 - 1 + for i := n - 1; i >= 0; i-- { + if nonUnit { + x[ix] /= ap[offset] + } + xix := x[ix] + atmp := ap[offset-i : offset] + jx := kx + for _, v := range atmp { + x[jx] -= v * xix + jx += incX + } + ix -= incX + offset -= i + 1 + } +} + +// Sspmv performs +// y = alpha * A * x + beta * y, +// where A is an n×n symmetric matrix in packed format, x and y are vectors +// and alpha and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, a []float32, x []float32, incX int, beta float32, y []float32, incY int) { + // Verify inputs + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if len(a) < (n*(n+1))/2 { + panic(badLdA) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + // Quick return if possible + if n == 0 || (alpha == 0 && beta == 1) { + return + } + + // Set up start points + var kx, ky int + if incX > 0 { + kx = 0 + } else { + kx = -(n - 1) * incX + } + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + + // Form y = beta * y + if beta != 1 { + if incY > 0 { + Implementation{}.Sscal(n, beta, y, incY) + } else { + Implementation{}.Sscal(n, beta, y, -incY) + } + } + + if alpha == 0 { + return + } + + if n == 1 { + y[0] += alpha * a[0] * x[0] + return + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + sum := a[offset] * x[i] + atmp := a[offset+1 : offset+n-i] + xtmp := x[i+1:] + jy := ky + (i+1)*incY + for j, v := range atmp { + sum += v * xtmp[j] + y[jy] += v * xv + jy += incY + } + y[iy] += alpha * sum + iy += incY + offset += n - i + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + sum := a[offset] * x[ix] + atmp := a[offset+1 : offset+n-i] + jx := kx + (i+1)*incX + jy := ky + (i+1)*incY + for _, v := range atmp { + sum += v * x[jx] + y[jy] += v * xv + jx += incX + jy += incY + } + y[iy] += alpha * sum + ix += incX + iy += incY + offset += n - i + } + return + } + if incX == 1 { + iy := ky + for i := 0; i < n; i++ { + xv := x[i] * alpha + atmp := a[offset-i : offset] + jy := ky + var sum float32 + for j, v := range atmp { + sum += v * x[j] + y[jy] += v * xv + jy += incY + } + sum += a[offset] * x[i] + y[iy] += alpha * sum + iy += incY + offset += i + 2 + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + xv := x[ix] * alpha + atmp := a[offset-i : offset] + jx := kx + jy := ky + var sum float32 + for _, v := range atmp { + sum += v * x[jx] + y[jy] += v * xv + jx += incX + jy += incY + } + + sum += a[offset] * x[ix] + y[iy] += alpha * sum + ix += incX + iy += incY + offset += i + 2 + } +} + +// Sspr computes the rank-one operation +// a += alpha * x * x^T +// where a is an n×n symmetric matrix in packed format, x is a vector, and +// alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if len(a) < (n*(n+1))/2 { + panic(badLdA) + } + if alpha == 0 || n == 0 { + return + } + lenX := n + var kx int + if incX > 0 { + kx = 0 + } else { + kx = -(lenX - 1) * incX + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 { + for i := 0; i < n; i++ { + atmp := a[offset:] + xv := alpha * x[i] + xtmp := x[i:n] + for j, v := range xtmp { + atmp[j] += xv * v + } + offset += n - i + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + i*incX + atmp := a[offset:] + xv := alpha * x[ix] + for j := 0; j < n-i; j++ { + atmp[j] += xv * x[jx] + jx += incX + } + ix += incX + offset += n - i + } + return + } + if incX == 1 { + for i := 0; i < n; i++ { + atmp := a[offset-i:] + xv := alpha * x[i] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += xv * v + } + offset += i + 2 + } + return + } + ix := kx + for i := 0; i < n; i++ { + jx := kx + atmp := a[offset-i:] + xv := alpha * x[ix] + for j := 0; j <= i; j++ { + atmp[j] += xv * x[jx] + jx += incX + } + ix += incX + offset += i + 2 + } +} + +// Sspr2 performs the symmetric rank-2 update +// A += alpha * x * y^T + alpha * y * x^T, +// where A is an n×n symmetric matrix in packed format, x and y are vectors, +// and alpha is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if n < 0 { + panic(nLT0) + } + if incX == 0 { + panic(zeroIncX) + } + if incY == 0 { + panic(zeroIncY) + } + if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { + panic(badX) + } + if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { + panic(badY) + } + if len(ap) < (n*(n+1))/2 { + panic(badLdA) + } + if alpha == 0 { + return + } + var ky, kx int + if incY > 0 { + ky = 0 + } else { + ky = -(n - 1) * incY + } + if incX > 0 { + kx = 0 + } else { + kx = -(n - 1) * incX + } + var offset int // Offset is the index of (i,i). + if ul == blas.Upper { + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset:] + xi := x[i] + yi := y[i] + xtmp := x[i:n] + ytmp := y[i:n] + for j, v := range xtmp { + atmp[j] += alpha * (xi*ytmp[j] + v*yi) + } + offset += n - i + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + i*incX + jy := ky + i*incY + atmp := ap[offset:] + xi := x[ix] + yi := y[iy] + for j := 0; j < n-i; j++ { + atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) + jx += incX + jy += incY + } + ix += incX + iy += incY + offset += n - i + } + return + } + if incX == 1 && incY == 1 { + for i := 0; i < n; i++ { + atmp := ap[offset-i:] + xi := x[i] + yi := y[i] + xtmp := x[:i+1] + for j, v := range xtmp { + atmp[j] += alpha * (xi*y[j] + v*yi) + } + offset += i + 2 + } + return + } + ix := kx + iy := ky + for i := 0; i < n; i++ { + jx := kx + jy := ky + atmp := ap[offset-i:] + for j := 0; j <= i; j++ { + atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) + jx += incX + jy += incY + } + ix += incX + iy += incY + offset += i + 2 + } +} diff --git a/blas/native/level3double.go b/blas/native/level3double.go new file mode 100644 index 00000000..3f46d775 --- /dev/null +++ b/blas/native/level3double.go @@ -0,0 +1,831 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/blas" + "github.com/gonum/internal/asm/f64" +) + +var _ blas.Float64Level3 = Implementation{} + +// Dtrsm solves +// A * X = alpha * B, if tA == blas.NoTrans side == blas.Left, +// A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// X * A = alpha * B, if tA == blas.NoTrans side == blas.Right, +// X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a +// scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in place into X. +// +// No check is made that A is invertible. +func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if ldb < n { + panic(badLdB) + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic(badLdA) + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic(badLdB) + } + + if m == 0 || n == 0 { + return + } + + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := m - 1; i >= 0; i-- { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + for j := range btmp { + btmp[j] *= alpha + } + } + for ka, va := range a[i*lda+i+1 : i*lda+m] { + k := ka + i + 1 + if va != 0 { + f64.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + for j := 0; j < n; j++ { + btmp[j] *= tmp + } + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + for j := 0; j < n; j++ { + btmp[j] *= alpha + } + } + for k, va := range a[i*lda : i*lda+i] { + if va != 0 { + f64.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + for j := 0; j < n; j++ { + btmp[j] *= tmp + } + } + } + return + } + // Cases where a is transposed + if ul == blas.Upper { + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + for ia, va := range a[k*lda+k+1 : k*lda+m] { + i := ia + k + 1 + if va != 0 { + btmp := b[i*ldb : i*ldb+n] + f64.AxpyUnitaryTo(btmp, -va, btmpk, btmp) + } + } + if alpha != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= alpha + } + } + } + return + } + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + for i, va := range a[k*lda : k*lda+k] { + if va != 0 { + btmp := b[i*ldb : i*ldb+n] + f64.AxpyUnitaryTo(btmp, -va, btmpk, btmp) + } + } + if alpha != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= alpha + } + } + } + return + } + // Cases where a is to the right of X. + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + for j := 0; j < n; j++ { + btmp[j] *= alpha + } + } + for k, vb := range btmp { + if vb != 0 { + if btmp[k] != 0 { + if nonUnit { + btmp[k] /= a[k*lda+k] + } + btmpk := btmp[k+1 : n] + f64.AxpyUnitaryTo(btmpk, -btmp[k], a[k*lda+k+1:k*lda+n], btmpk) + } + } + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*lda : i*lda+n] + if alpha != 1 { + for j := 0; j < n; j++ { + btmp[j] *= alpha + } + } + for k := n - 1; k >= 0; k-- { + if btmp[k] != 0 { + if nonUnit { + btmp[k] /= a[k*lda+k] + } + f64.AxpyUnitaryTo(btmp, -btmp[k], a[k*lda:k*lda+k], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*lda : i*lda+n] + for j := n - 1; j >= 0; j-- { + tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:]) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*lda : i*lda+n] + for j := 0; j < n; j++ { + tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda:j*lda+j], btmp) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } +} + +// Dsymm performs one of +// C = alpha * A * B + beta * C, if side == blas.Left, +// C = alpha * B * A + beta * C, if side == blas.Right, +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha +// is a scalar. +func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + if s != blas.Right && s != blas.Left { + panic("goblas: bad side") + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic(badLdA) + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic(badLdB) + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic(badLdC) + } + if m == 0 || n == 0 { + return + } + if alpha == 0 && beta == 1 { + return + } + if alpha == 0 { + if beta == 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := 0; j < n; j++ { + ctmp[j] *= beta + } + } + return + } + + isUpper := ul == blas.Upper + if s == blas.Left { + for i := 0; i < m; i++ { + atmp := alpha * a[i*lda+i] + btmp := b[i*ldb : i*ldb+n] + ctmp := c[i*ldc : i*ldc+n] + for j, v := range btmp { + ctmp[j] *= beta + ctmp[j] += atmp * v + } + + for k := 0; k < i; k++ { + var atmp float64 + if isUpper { + atmp = a[k*lda+i] + } else { + atmp = a[i*lda+k] + } + atmp *= alpha + ctmp := c[i*ldc : i*ldc+n] + f64.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp) + } + for k := i + 1; k < m; k++ { + var atmp float64 + if isUpper { + atmp = a[i*lda+k] + } else { + atmp = a[k*lda+i] + } + atmp *= alpha + ctmp := c[i*ldc : i*ldc+n] + f64.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp) + } + } + return + } + if isUpper { + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + tmp := alpha * b[i*ldb+j] + var tmp2 float64 + atmp := a[j*lda+j+1 : j*lda+n] + btmp := b[i*ldb+j+1 : i*ldb+n] + ctmp := c[i*ldc+j+1 : i*ldc+n] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } + return + } + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + tmp := alpha * b[i*ldb+j] + var tmp2 float64 + atmp := a[j*lda : j*lda+j] + btmp := b[i*ldb : i*ldb+j] + ctmp := c[i*ldc : i*ldc+j] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } +} + +// Dsyrk performs the symmetric rank-k operation +// C = alpha * A * A^T + beta*C +// C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and +// a k×n matrix otherwise. alpha and beta are scalars. +func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if ldc < n { + panic(badLdC) + } + var row, col int + if tA == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic(badLdA) + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic(badLdC) + } + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + atmp := a[i*lda : i*lda+k] + for jc, vc := range ctmp { + j := jc + i + ctmp[jc] = vc*beta + alpha*f64.DotUnitary(atmp, a[j*lda:j*lda+k]) + } + } + return + } + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + for j, vc := range c[i*ldc : i*ldc+i+1] { + c[i*ldc+j] = vc*beta + alpha*f64.DotUnitary(a[j*lda:j*lda+k], atmp) + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f64.AxpyUnitaryTo(ctmp, tmp, a[l*lda+i:l*lda+n], ctmp) + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + if beta != 0 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f64.AxpyUnitaryTo(ctmp, tmp, a[l*lda:l*lda+i+1], ctmp) + } + } + } +} + +// Dsyr2k performs the symmetric rank 2k operation +// C = alpha * A * B^T + alpha * B * A^T + beta * C +// where C is an n×n symmetric matrix. A and B are n×k matrices if +// tA == NoTrans and k×n otherwise. alpha and beta are scalars. +func (Implementation) Dsyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if ldc < n { + panic(badLdC) + } + var row, col int + if tA == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic(badLdA) + } + if ldb*(row-1)+col > len(b) || ldb < max(1, col) { + panic(badLdB) + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic(badLdC) + } + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc+i : i*ldc+n] + for jc := range ctmp { + j := i + jc + var tmp1, tmp2 float64 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[jc] *= beta + ctmp[jc] += alpha * (tmp1 + tmp2) + } + } + return + } + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc : i*ldc+i+1] + for j := 0; j <= i; j++ { + var tmp1, tmp2 float64 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[j] *= beta + ctmp[j] += alpha * (tmp1 + tmp2) + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*lda+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb+i : l*ldb+n] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda+i : l*lda+n] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*lda+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb : l*ldb+i+1] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda : l*lda+i+1] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } +} + +// Dtrmm performs +// B = alpha * A * B, if tA == blas.NoTrans and side == blas.Left, +// B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// B = alpha * B * A, if tA == blas.NoTrans and side == blas.Right, +// B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, and B is an m×n matrix. +func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic(badLdA) + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic(badLdB) + } + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] *= tmp + } + for ka, va := range a[i*lda+i+1 : i*lda+m] { + k := ka + i + 1 + tmp := alpha * va + if tmp != 0 { + f64.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + for i := m - 1; i >= 0; i-- { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] *= tmp + } + for k, va := range a[i*lda : i*lda+i] { + tmp := alpha * va + if tmp != 0 { + f64.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + for ia, va := range a[k*lda+k+1 : k*lda+m] { + i := ia + k + 1 + btmp := b[i*ldb : i*ldb+n] + tmp := alpha * va + if tmp != 0 { + f64.AxpyUnitaryTo(btmp, tmp, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + } + return + } + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + for i, va := range a[k*lda : k*lda+k] { + btmp := b[i*ldb : i*ldb+n] + tmp := alpha * va + if tmp != 0 { + f64.AxpyUnitaryTo(btmp, tmp, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + } + return + } + // Cases where a is on the right + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := n - 1; k >= 0; k-- { + tmp := alpha * btmp[k] + if tmp != 0 { + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + for ja, v := range a[k*lda+k+1 : k*lda+n] { + j := ja + k + 1 + btmp[j] += tmp * v + } + } + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := 0; k < n; k++ { + tmp := alpha * btmp[k] + if tmp != 0 { + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + f64.AxpyUnitaryTo(btmp, tmp, a[k*lda:k*lda+k], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j, vb := range btmp { + tmp := vb + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n]) + btmp[j] = alpha * tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + tmp := btmp[j] + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) + btmp[j] = alpha * tmp + } + } +} diff --git a/blas/native/level3double_test.go b/blas/native/level3double_test.go new file mode 100644 index 00000000..6c94dca4 --- /dev/null +++ b/blas/native/level3double_test.go @@ -0,0 +1,35 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "testing" + + "github.com/gonum/blas/testblas" +) + +func TestDgemm(t *testing.T) { + testblas.TestDgemm(t, impl) +} + +func TestDsymm(t *testing.T) { + testblas.DsymmTest(t, impl) +} + +func TestDtrsm(t *testing.T) { + testblas.DtrsmTest(t, impl) +} + +func TestDsyrk(t *testing.T) { + testblas.DsyrkTest(t, impl) +} + +func TestDsyr2k(t *testing.T) { + testblas.Dsyr2kTest(t, impl) +} + +func TestDtrmm(t *testing.T) { + testblas.DtrmmTest(t, impl) +} diff --git a/blas/native/level3single.go b/blas/native/level3single.go new file mode 100644 index 00000000..320eaad5 --- /dev/null +++ b/blas/native/level3single.go @@ -0,0 +1,843 @@ +// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT. + +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/blas" + "github.com/gonum/internal/asm/f32" +) + +var _ blas.Float32Level3 = Implementation{} + +// Strsm solves +// A * X = alpha * B, if tA == blas.NoTrans side == blas.Left, +// A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// X * A = alpha * B, if tA == blas.NoTrans side == blas.Right, +// X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a +// scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in place into X. +// +// No check is made that A is invertible. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if ldb < n { + panic(badLdB) + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic(badLdA) + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic(badLdB) + } + + if m == 0 || n == 0 { + return + } + + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := m - 1; i >= 0; i-- { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + for j := range btmp { + btmp[j] *= alpha + } + } + for ka, va := range a[i*lda+i+1 : i*lda+m] { + k := ka + i + 1 + if va != 0 { + f32.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + for j := 0; j < n; j++ { + btmp[j] *= tmp + } + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + for j := 0; j < n; j++ { + btmp[j] *= alpha + } + } + for k, va := range a[i*lda : i*lda+i] { + if va != 0 { + f32.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + for j := 0; j < n; j++ { + btmp[j] *= tmp + } + } + } + return + } + // Cases where a is transposed + if ul == blas.Upper { + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + for ia, va := range a[k*lda+k+1 : k*lda+m] { + i := ia + k + 1 + if va != 0 { + btmp := b[i*ldb : i*ldb+n] + f32.AxpyUnitaryTo(btmp, -va, btmpk, btmp) + } + } + if alpha != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= alpha + } + } + } + return + } + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + for i, va := range a[k*lda : k*lda+k] { + if va != 0 { + btmp := b[i*ldb : i*ldb+n] + f32.AxpyUnitaryTo(btmp, -va, btmpk, btmp) + } + } + if alpha != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= alpha + } + } + } + return + } + // Cases where a is to the right of X. + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + for j := 0; j < n; j++ { + btmp[j] *= alpha + } + } + for k, vb := range btmp { + if vb != 0 { + if btmp[k] != 0 { + if nonUnit { + btmp[k] /= a[k*lda+k] + } + btmpk := btmp[k+1 : n] + f32.AxpyUnitaryTo(btmpk, -btmp[k], a[k*lda+k+1:k*lda+n], btmpk) + } + } + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*lda : i*lda+n] + if alpha != 1 { + for j := 0; j < n; j++ { + btmp[j] *= alpha + } + } + for k := n - 1; k >= 0; k-- { + if btmp[k] != 0 { + if nonUnit { + btmp[k] /= a[k*lda+k] + } + f32.AxpyUnitaryTo(btmp, -btmp[k], a[k*lda:k*lda+k], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*lda : i*lda+n] + for j := n - 1; j >= 0; j-- { + tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:]) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*lda : i*lda+n] + for j := 0; j < n; j++ { + tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda:j*lda+j], btmp) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } +} + +// Ssymm performs one of +// C = alpha * A * B + beta * C, if side == blas.Left, +// C = alpha * B * A + beta * C, if side == blas.Right, +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha +// is a scalar. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { + if s != blas.Right && s != blas.Left { + panic("goblas: bad side") + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic(badLdA) + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic(badLdB) + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic(badLdC) + } + if m == 0 || n == 0 { + return + } + if alpha == 0 && beta == 1 { + return + } + if alpha == 0 { + if beta == 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := 0; j < n; j++ { + ctmp[j] *= beta + } + } + return + } + + isUpper := ul == blas.Upper + if s == blas.Left { + for i := 0; i < m; i++ { + atmp := alpha * a[i*lda+i] + btmp := b[i*ldb : i*ldb+n] + ctmp := c[i*ldc : i*ldc+n] + for j, v := range btmp { + ctmp[j] *= beta + ctmp[j] += atmp * v + } + + for k := 0; k < i; k++ { + var atmp float32 + if isUpper { + atmp = a[k*lda+i] + } else { + atmp = a[i*lda+k] + } + atmp *= alpha + ctmp := c[i*ldc : i*ldc+n] + f32.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp) + } + for k := i + 1; k < m; k++ { + var atmp float32 + if isUpper { + atmp = a[i*lda+k] + } else { + atmp = a[k*lda+i] + } + atmp *= alpha + ctmp := c[i*ldc : i*ldc+n] + f32.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp) + } + } + return + } + if isUpper { + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + tmp := alpha * b[i*ldb+j] + var tmp2 float32 + atmp := a[j*lda+j+1 : j*lda+n] + btmp := b[i*ldb+j+1 : i*ldb+n] + ctmp := c[i*ldc+j+1 : i*ldc+n] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } + return + } + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + tmp := alpha * b[i*ldb+j] + var tmp2 float32 + atmp := a[j*lda : j*lda+j] + btmp := b[i*ldb : i*ldb+j] + ctmp := c[i*ldc : i*ldc+j] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } +} + +// Ssyrk performs the symmetric rank-k operation +// C = alpha * A * A^T + beta*C +// C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and +// a k×n matrix otherwise. alpha and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if ldc < n { + panic(badLdC) + } + var row, col int + if tA == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic(badLdA) + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic(badLdC) + } + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + atmp := a[i*lda : i*lda+k] + for jc, vc := range ctmp { + j := jc + i + ctmp[jc] = vc*beta + alpha*f32.DotUnitary(atmp, a[j*lda:j*lda+k]) + } + } + return + } + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + for j, vc := range c[i*ldc : i*ldc+i+1] { + c[i*ldc+j] = vc*beta + alpha*f32.DotUnitary(a[j*lda:j*lda+k], atmp) + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f32.AxpyUnitaryTo(ctmp, tmp, a[l*lda+i:l*lda+n], ctmp) + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + if beta != 0 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f32.AxpyUnitaryTo(ctmp, tmp, a[l*lda:l*lda+i+1], ctmp) + } + } + } +} + +// Ssyr2k performs the symmetric rank 2k operation +// C = alpha * A * B^T + alpha * B * A^T + beta * C +// where C is an n×n symmetric matrix. A and B are n×k matrices if +// tA == NoTrans and k×n otherwise. alpha and beta are scalars. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Ssyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if ldc < n { + panic(badLdC) + } + var row, col int + if tA == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic(badLdA) + } + if ldb*(row-1)+col > len(b) || ldb < max(1, col) { + panic(badLdB) + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic(badLdC) + } + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc+i : i*ldc+n] + for jc := range ctmp { + j := i + jc + var tmp1, tmp2 float32 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[jc] *= beta + ctmp[jc] += alpha * (tmp1 + tmp2) + } + } + return + } + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc : i*ldc+i+1] + for j := 0; j <= i; j++ { + var tmp1, tmp2 float32 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[j] *= beta + ctmp[j] += alpha * (tmp1 + tmp2) + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*lda+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb+i : l*ldb+n] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda+i : l*lda+n] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*lda+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb : l*ldb+i+1] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda : l*lda+i+1] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } +} + +// Strmm performs +// B = alpha * A * B, if tA == blas.NoTrans and side == blas.Left, +// B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// B = alpha * B * A, if tA == blas.NoTrans and side == blas.Right, +// B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, and B is an m×n matrix. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic(badLdA) + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic(badLdB) + } + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] *= tmp + } + for ka, va := range a[i*lda+i+1 : i*lda+m] { + k := ka + i + 1 + tmp := alpha * va + if tmp != 0 { + f32.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + for i := m - 1; i >= 0; i-- { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] *= tmp + } + for k, va := range a[i*lda : i*lda+i] { + tmp := alpha * va + if tmp != 0 { + f32.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + for ia, va := range a[k*lda+k+1 : k*lda+m] { + i := ia + k + 1 + btmp := b[i*ldb : i*ldb+n] + tmp := alpha * va + if tmp != 0 { + f32.AxpyUnitaryTo(btmp, tmp, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + } + return + } + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + for i, va := range a[k*lda : k*lda+k] { + btmp := b[i*ldb : i*ldb+n] + tmp := alpha * va + if tmp != 0 { + f32.AxpyUnitaryTo(btmp, tmp, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + } + return + } + // Cases where a is on the right + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := n - 1; k >= 0; k-- { + tmp := alpha * btmp[k] + if tmp != 0 { + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + for ja, v := range a[k*lda+k+1 : k*lda+n] { + j := ja + k + 1 + btmp[j] += tmp * v + } + } + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := 0; k < n; k++ { + tmp := alpha * btmp[k] + if tmp != 0 { + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + f32.AxpyUnitaryTo(btmp, tmp, a[k*lda:k*lda+k], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j, vb := range btmp { + tmp := vb + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n]) + btmp[j] = alpha * tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + tmp := btmp[j] + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) + btmp[j] = alpha * tmp + } + } +} diff --git a/blas/native/native.go b/blas/native/native.go new file mode 100644 index 00000000..00126769 --- /dev/null +++ b/blas/native/native.go @@ -0,0 +1,72 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate ./single_precision.bash + +package native + +type Implementation struct{} + +// The following are panic strings used during parameter checks. +const ( + negativeN = "blas: n < 0" + zeroIncX = "blas: zero x index increment" + zeroIncY = "blas: zero y index increment" + badLenX = "blas: x index out of range" + badLenY = "blas: y index out of range" + + mLT0 = "blas: m < 0" + nLT0 = "blas: n < 0" + kLT0 = "blas: k < 0" + kLLT0 = "blas: kL < 0" + kULT0 = "blas: kU < 0" + + badUplo = "blas: illegal triangle" + badTranspose = "blas: illegal transpose" + badDiag = "blas: illegal diagonal" + badSide = "blas: illegal side" + + badLdA = "blas: index of a out of range" + badLdB = "blas: index of b out of range" + badLdC = "blas: index of c out of range" + + badX = "blas: x index out of range" + badY = "blas: y index out of range" +) + +// [SD]gemm behavior constants. These are kept here to keep them out of the +// way during single precision code genration. +const ( + blockSize = 64 // b x b matrix + minParBlock = 4 // minimum number of blocks needed to go parallel + buffMul = 4 // how big is the buffer relative to the number of workers +) + +// [SD]gemm debugging constant. +const debug = false + +// subMul is a common type shared by [SD]gemm. +type subMul struct { + i, j int // index of block +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +func min(a, b int) int { + if a > b { + return b + } + return a +} + +// blocks returns the number of divisons of the dimension length with the given +// block size. +func blocks(dim, bsize int) int { + return (dim + bsize - 1) / bsize +} diff --git a/blas/native/pardgemm_test.go b/blas/native/pardgemm_test.go new file mode 100644 index 00000000..9c508163 --- /dev/null +++ b/blas/native/pardgemm_test.go @@ -0,0 +1,181 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "math/rand" + "testing" + + "github.com/gonum/blas" +) + +func TestDgemmParallel(t *testing.T) { + for i, test := range []struct { + m int + n int + k int + alpha float64 + tA blas.Transpose + tB blas.Transpose + }{ + { + m: 3, + n: 4, + k: 2, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: blockSize*2 + 5, + n: 3, + k: 2, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: 3, + n: blockSize * 2, + k: 2, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: 2, + n: 3, + k: blockSize*3 - 2, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: blockSize * minParBlock, + n: 3, + k: 2, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: 3, + n: blockSize * minParBlock, + k: 2, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: 2, + n: 3, + k: blockSize * minParBlock, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: blockSize*minParBlock + 1, + n: blockSize * minParBlock, + k: 3, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: 3, + n: blockSize*minParBlock + 2, + k: blockSize * 3, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: blockSize * minParBlock, + n: 3, + k: blockSize * minParBlock, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: blockSize * minParBlock, + n: blockSize * minParBlock, + k: blockSize * 3, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + { + m: blockSize + blockSize/2, + n: blockSize + blockSize/2, + k: blockSize + blockSize/2, + alpha: 2.5, + tA: blas.NoTrans, + tB: blas.NoTrans, + }, + } { + testMatchParallelSerial(t, i, blas.NoTrans, blas.NoTrans, test.m, test.n, test.k, test.alpha) + testMatchParallelSerial(t, i, blas.Trans, blas.NoTrans, test.m, test.n, test.k, test.alpha) + testMatchParallelSerial(t, i, blas.NoTrans, blas.Trans, test.m, test.n, test.k, test.alpha) + testMatchParallelSerial(t, i, blas.Trans, blas.Trans, test.m, test.n, test.k, test.alpha) + } +} + +func testMatchParallelSerial(t *testing.T, i int, tA, tB blas.Transpose, m, n, k int, alpha float64) { + var ( + rowA, colA int + rowB, colB int + ) + if tA == blas.NoTrans { + rowA = m + colA = k + } else { + rowA = k + colA = m + } + if tB == blas.NoTrans { + rowB = k + colB = n + } else { + rowB = n + colB = k + } + a := randmat(rowA, colA, colA) + b := randmat(rowB, colB, colB) + c := randmat(m, n, n) + + aClone := a.clone() + bClone := b.clone() + cClone := c.clone() + + lda := colA + ldb := colB + ldc := n + dgemmSerial(tA == blas.Trans, tB == blas.Trans, m, n, k, a.data, lda, b.data, ldb, cClone.data, ldc, alpha) + dgemmParallel(tA == blas.Trans, tB == blas.Trans, m, n, k, a.data, lda, b.data, ldb, c.data, ldc, alpha) + if !a.equal(aClone) { + t.Errorf("Case %v: a changed during call to dgemmParallel", i) + } + if !b.equal(bClone) { + t.Errorf("Case %v: b changed during call to dgemmParallel", i) + } + if !c.equalWithinAbs(cClone, 1e-12) { + t.Errorf("Case %v: answer not equal parallel and serial", i) + } +} + +func randmat(r, c, stride int) general64 { + data := make([]float64, r*stride+c) + for i := range data { + data[i] = rand.Float64() + } + return general64{ + data: data, + rows: r, + cols: c, + stride: stride, + } +} diff --git a/blas/native/sgemm.go b/blas/native/sgemm.go new file mode 100644 index 00000000..1eb211fb --- /dev/null +++ b/blas/native/sgemm.go @@ -0,0 +1,280 @@ +// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT. + +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "runtime" + "sync" + + "github.com/gonum/blas" + "github.com/gonum/internal/asm/f32" +) + +// Sgemm computes +// C = beta * C + alpha * A * B, +// where A, B, and C are dense matrices, and alpha and beta are scalars. +// tA and tB specify whether A or B are transposed. +// +// Float32 implementations are autogenerated and not directly tested. +func (Implementation) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if tB != blas.NoTrans && tB != blas.Trans && tB != blas.ConjTrans { + panic(badTranspose) + } + aTrans := tA == blas.Trans || tA == blas.ConjTrans + if aTrans { + checkMatrix32(k, m, a, lda) + } else { + checkMatrix32(m, k, a, lda) + } + bTrans := tB == blas.Trans || tB == blas.ConjTrans + if bTrans { + checkMatrix32(n, k, b, ldb) + } else { + checkMatrix32(k, n, b, ldb) + } + checkMatrix32(m, n, c, ldc) + + // scale c + if beta != 1 { + if beta == 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + } else { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + } + } + + sgemmParallel(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha) +} + +func sgemmParallel(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // dgemmParallel computes a parallel matrix multiplication by partitioning + // a and b into sub-blocks, and updating c with the multiplication of the sub-block + // In all cases, + // A = [ A_11 A_12 ... A_1j + // A_21 A_22 ... A_2j + // ... + // A_i1 A_i2 ... A_ij] + // + // and same for B. All of the submatrix sizes are blockSize×blockSize except + // at the edges. + // + // In all cases, there is one dimension for each matrix along which + // C must be updated sequentially. + // Cij = \sum_k Aik Bki, (A * B) + // Cij = \sum_k Aki Bkj, (A^T * B) + // Cij = \sum_k Aik Bjk, (A * B^T) + // Cij = \sum_k Aki Bjk, (A^T * B^T) + // + // This code computes one {i, j} block sequentially along the k dimension, + // and computes all of the {i, j} blocks concurrently. This + // partitioning allows Cij to be updated in-place without race-conditions. + // Instead of launching a goroutine for each possible concurrent computation, + // a number of worker goroutines are created and channels are used to pass + // available and completed cases. + // + // http://alexkr.com/docs/matrixmult.pdf is a good reference on matrix-matrix + // multiplies, though this code does not copy matrices to attempt to eliminate + // cache misses. + + maxKLen := k + parBlocks := blocks(m, blockSize) * blocks(n, blockSize) + if parBlocks < minParBlock { + // The matrix multiplication is small in the dimensions where it can be + // computed concurrently. Just do it in serial. + sgemmSerial(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + } + + nWorkers := runtime.GOMAXPROCS(0) + if parBlocks < nWorkers { + nWorkers = parBlocks + } + // There is a tradeoff between the workers having to wait for work + // and a large buffer making operations slow. + buf := buffMul * nWorkers + if buf > parBlocks { + buf = parBlocks + } + + sendChan := make(chan subMul, buf) + + // Launch workers. A worker receives an {i, j} submatrix of c, and computes + // A_ik B_ki (or the transposed version) storing the result in c_ij. When the + // channel is finally closed, it signals to the waitgroup that it has finished + // computing. + var wg sync.WaitGroup + for i := 0; i < nWorkers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + // Make local copies of otherwise global variables to reduce shared memory. + // This has a noticable effect on benchmarks in some cases. + alpha := alpha + aTrans := aTrans + bTrans := bTrans + m := m + n := n + for sub := range sendChan { + i := sub.i + j := sub.j + leni := blockSize + if i+leni > m { + leni = m - i + } + lenj := blockSize + if j+lenj > n { + lenj = n - j + } + + cSub := sliceView32(c, ldc, i, j, leni, lenj) + + // Compute A_ik B_kj for all k + for k := 0; k < maxKLen; k += blockSize { + lenk := blockSize + if k+lenk > maxKLen { + lenk = maxKLen - k + } + var aSub, bSub []float32 + if aTrans { + aSub = sliceView32(a, lda, k, i, lenk, leni) + } else { + aSub = sliceView32(a, lda, i, k, leni, lenk) + } + if bTrans { + bSub = sliceView32(b, ldb, j, k, lenj, lenk) + } else { + bSub = sliceView32(b, ldb, k, j, lenk, lenj) + } + sgemmSerial(aTrans, bTrans, leni, lenj, lenk, aSub, lda, bSub, ldb, cSub, ldc, alpha) + } + } + }() + } + + // Send out all of the {i, j} subblocks for computation. + for i := 0; i < m; i += blockSize { + for j := 0; j < n; j += blockSize { + sendChan <- subMul{ + i: i, + j: j, + } + } + } + close(sendChan) + wg.Wait() +} + +// sgemmSerial is serial matrix multiply +func sgemmSerial(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + switch { + case !aTrans && !bTrans: + sgemmSerialNotNot(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case aTrans && !bTrans: + sgemmSerialTransNot(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case !aTrans && bTrans: + sgemmSerialNotTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + case aTrans && bTrans: + sgemmSerialTransTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha) + return + default: + panic("unreachable") + } +} + +// sgemmSerial where neither a nor b are transposed +func sgemmSerialNotNot(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for l, v := range a[i*lda : i*lda+k] { + tmp := alpha * v + if tmp != 0 { + f32.AxpyUnitaryTo(ctmp, tmp, b[l*ldb:l*ldb+n], ctmp) + } + } + } +} + +// sgemmSerial where neither a is transposed and b is not +func sgemmSerialTransNot(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for l := 0; l < k; l++ { + btmp := b[l*ldb : l*ldb+n] + for i, v := range a[l*lda : l*lda+m] { + tmp := alpha * v + if tmp != 0 { + ctmp := c[i*ldc : i*ldc+n] + f32.AxpyUnitaryTo(ctmp, tmp, btmp, ctmp) + } + } + } +} + +// sgemmSerial where neither a is not transposed and b is +func sgemmSerialNotTrans(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for i := 0; i < m; i++ { + atmp := a[i*lda : i*lda+k] + ctmp := c[i*ldc : i*ldc+n] + for j := 0; j < n; j++ { + ctmp[j] += alpha * f32.DotUnitary(atmp, b[j*ldb:j*ldb+k]) + } + } +} + +// sgemmSerial where both are transposed +func sgemmSerialTransTrans(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) { + // This style is used instead of the literal [i*stride +j]) is used because + // approximately 5 times faster as of go 1.3. + for l := 0; l < k; l++ { + for i, v := range a[l*lda : l*lda+m] { + tmp := alpha * v + if tmp != 0 { + ctmp := c[i*ldc : i*ldc+n] + f32.AxpyInc(tmp, b[l:], ctmp, uintptr(n), uintptr(ldb), 1, 0, 0) + } + } + } +} + +func sliceView32(a []float32, lda, i, j, r, c int) []float32 { + return a[i*lda+j : (i+r-1)*lda+j+c] +} + +func checkMatrix32(m, n int, a []float32, lda int) { + if m < 0 { + panic("blas: rows < 0") + } + if n < 0 { + panic("blas: cols < 0") + } + if lda < n { + panic("blas: illegal stride") + } + if len(a) < (m-1)*lda+n { + panic("blas: insufficient matrix slice length") + } +} diff --git a/blas/native/single_precision.bash b/blas/native/single_precision.bash new file mode 100755 index 00000000..72a75b99 --- /dev/null +++ b/blas/native/single_precision.bash @@ -0,0 +1,156 @@ +#!/usr/bin/env bash + +# Copyright ©2015 The gonum Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +WARNING='//\ +// Float32 implementations are autogenerated and not directly tested.\ +' + +# Level1 routines. + +echo Generating level1single.go +echo -e '// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.\n' > level1single.go +cat level1double.go \ +| gofmt -r 'blas.Float64Level1 -> blas.Float32Level1' \ +\ +| gofmt -r 'float64 -> float32' \ +| gofmt -r 'blas.DrotmParams -> blas.SrotmParams' \ +\ +| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \ +| gofmt -r 'f64.AxpyIncTo -> f32.AxpyIncTo' \ +| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +| gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \ + -e 's_^// D_// S_' \ + -e "s_^\(func (Implementation) \)Id\(.*\)\$_$WARNING\1Is\2_" \ + -e 's_^// Id_// Is_' \ + -e 's_"github.com/gonum/internal/asm/f64"_"github.com/gonum/internal/asm/f32"_' \ + -e 's_"math"_math "github.com/gonum/blas/native/internal/math32"_' \ +>> level1single.go + +echo Generating level1single_sdot.go +echo -e '// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.\n' > level1single_sdot.go +cat level1double_ddot.go \ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'f64.DotInc -> f32.DotInc' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \ + -e 's_^// D_// S_' \ + -e 's_"github.com/gonum/internal/asm/f64"_"github.com/gonum/internal/asm/f32"_' \ +>> level1single_sdot.go + +echo Generating level1single_dsdot.go +echo -e '// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.\n' > level1single_dsdot.go +cat level1double_ddot.go \ +| gofmt -r '[]float64 -> []float32' \ +\ +| gofmt -r 'f64.DotInc -> f32.DdotInc' \ +| gofmt -r 'f64.DotUnitary -> f32.DdotUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1Ds\2_" \ + -e 's_^// D_// Ds_' \ + -e 's_"github.com/gonum/internal/asm/f64"_"github.com/gonum/internal/asm/f32"_' \ +>> level1single_dsdot.go + +echo Generating level1single_sdsdot.go +echo -e '// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.\n' > level1single_sdsdot.go +cat level1double_ddot.go \ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'f64.DotInc(x, y, f(n), f(incX), f(incY), f(ix), f(iy)) -> alpha + float32(f32.DdotInc(x, y, f(n), f(incX), f(incY), f(ix), f(iy)))' \ +| gofmt -r 'f64.DotUnitary(a, b) -> alpha + float32(f32.DdotUnitary(a, b))' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1Sds\2_" \ + -e 's_^// D\(.*\)$_// Sds\1 plus a constant_' \ + -e 's_\\sum_alpha + \\sum_' \ + -e 's/n int/n int, alpha float32/' \ + -e 's_"github.com/gonum/internal/asm/f64"_"github.com/gonum/internal/asm/f32"_' \ +>> level1single_sdsdot.go + + +# Level2 routines. + +echo Generating level2single.go +echo -e '// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.\n' > level2single.go +cat level2double.go \ +| gofmt -r 'blas.Float64Level2 -> blas.Float32Level2' \ +\ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'Dscal -> Sscal' \ +\ +| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \ +| gofmt -r 'f64.AxpyIncTo -> f32.AxpyIncTo' \ +| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \ +| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \ +| gofmt -r 'f64.DotInc -> f32.DotInc' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \ + -e 's_^// D_// S_' \ + -e 's_"github.com/gonum/internal/asm/f64"_"github.com/gonum/internal/asm/f32"_' \ +>> level2single.go + + +# Level3 routines. + +echo Generating level3single.go +echo -e '// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.\n' > level3single.go +cat level3double.go \ +| gofmt -r 'blas.Float64Level3 -> blas.Float32Level3' \ +\ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \ + -e 's_^// D_// S_' \ + -e 's_"github.com/gonum/internal/asm/f64"_"github.com/gonum/internal/asm/f32"_' \ +>> level3single.go + +echo Generating general_single.go +echo -e '// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.\n' > general_single.go +cat general_double.go \ +| gofmt -r 'float64 -> float32' \ +\ +| gofmt -r 'general64 -> general32' \ +| gofmt -r 'newGeneral64 -> newGeneral32' \ +\ +| sed -e 's/(g general64) print()/(g general32) print()/' \ + -e 's_"math"_math "github.com/gonum/blas/native/internal/math32"_' \ + -e 's_"github.com/gonum/internal/asm/f64"_"github.com/gonum/internal/asm/f32"_' \ +>> general_single.go + +echo Generating sgemm.go +echo -e '// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.\n' > sgemm.go +cat dgemm.go \ +| gofmt -r 'float64 -> float32' \ +| gofmt -r 'general64 -> general32' \ +| gofmt -r 'sliceView64 -> sliceView32' \ +| gofmt -r 'checkMatrix64 -> checkMatrix32' \ +\ +| gofmt -r 'dgemmParallel -> sgemmParallel' \ +| gofmt -r 'computeNumBlocks64 -> computeNumBlocks32' \ +| gofmt -r 'dgemmSerial -> sgemmSerial' \ +| gofmt -r 'dgemmSerialNotNot -> sgemmSerialNotNot' \ +| gofmt -r 'dgemmSerialTransNot -> sgemmSerialTransNot' \ +| gofmt -r 'dgemmSerialNotTrans -> sgemmSerialNotTrans' \ +| gofmt -r 'dgemmSerialTransTrans -> sgemmSerialTransTrans' \ +\ +| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \ +| gofmt -r 'f64.AxpyIncTo -> f32.AxpyIncTo' \ +| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \ +| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \ +\ +| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \ + -e 's_^// D_// S_' \ + -e 's_^// d_// s_' \ + -e 's_"github.com/gonum/internal/asm/f64"_"github.com/gonum/internal/asm/f32"_' \ +>> sgemm.go diff --git a/blas/testblas/benchautogen/autogen_bench_level1double.go b/blas/testblas/benchautogen/autogen_bench_level1double.go new file mode 100644 index 00000000..12141afa --- /dev/null +++ b/blas/testblas/benchautogen/autogen_bench_level1double.go @@ -0,0 +1,292 @@ +// Copyright 2014 The Gonum Authors. All rights reserved. +// Use of this code is governed by a BSD-style +// license that can be found in the LICENSE file + +// Script for automatic code generation of the benchmark routines +package main + +import ( + "fmt" + "os" + "os/exec" + "path" + "path/filepath" + "strconv" +) + +var gopath string + +var copyrightnotice = []byte(`// Copyright 2014 The Gonum Authors. All rights reserved. +// Use of this code is governed by a BSD-style +// license that can be found in the LICENSE file`) + +var autogen = []byte("// Code generated by \"go run github.com/gonum/blas/testblas/benchautogen/autogen_bench_level1double.go\"; DO NOT EDIT.\n") + +var imports = []byte(`import( + "math/rand" + "testing" + + "github.com/gonum/blas" +)`) + +var randomSliceFunction = []byte(`func randomSlice(l, idx int) ([]float64) { + if idx < 0{ + idx = -idx + } + s := make([]float64, l * idx) + for i := range s { + s[i] = rand.Float64() + } + return s +}`) + +const ( + posInc1 = 5 + posInc2 = 3 + negInc1 = -3 + negInc2 = -4 +) + +var level1Sizes = []struct { + lower string + upper string + camel string + size int +}{ + { + lower: "small", + upper: "SMALL_SLICE", + camel: "Small", + size: 10, + }, + { + lower: "medium", + upper: "MEDIUM_SLICE", + camel: "Medium", + size: 1000, + }, + { + lower: "large", + upper: "LARGE_SLICE", + camel: "Large", + size: 100000, + }, + { + lower: "huge", + upper: "HUGE_SLICE", + camel: "Huge", + size: 10000000, + }, +} + +type level1functionStruct struct { + camel string + sig string + call string + extraSetup string + oneInput bool + extraName string // if have a couple different cases for the same function +} + +var level1Functions = []level1functionStruct{ + { + camel: "Ddot", + sig: "n int, x []float64, incX int, y []float64, incY int", + call: "n, x, incX, y, incY", + oneInput: false, + }, + { + camel: "Dnrm2", + sig: "n int, x []float64, incX int", + call: "n, x, incX", + oneInput: true, + }, + { + camel: "Dasum", + sig: "n int, x []float64, incX int", + call: "n, x, incX", + oneInput: true, + }, + { + camel: "Idamax", + sig: "n int, x []float64, incX int", + call: "n, x, incX", + oneInput: true, + }, + { + camel: "Dswap", + sig: "n int, x []float64, incX int, y []float64, incY int", + call: "n, x, incX, y, incY", + oneInput: false, + }, + { + camel: "Dcopy", + sig: "n int, x []float64, incX int, y []float64, incY int", + call: "n, x, incX, y, incY", + oneInput: false, + }, + { + camel: "Daxpy", + sig: "n int, alpha float64, x []float64, incX int, y []float64, incY int", + call: "n, alpha, x, incX, y, incY", + extraSetup: "alpha := 2.4", + oneInput: false, + }, + { + camel: "Drot", + sig: "n int, x []float64, incX int, y []float64, incY int, c, s float64", + call: "n, x, incX, y, incY, c, s", + extraSetup: "c := 0.89725836967\ns:= 0.44150585279", + oneInput: false, + }, + { + camel: "Drotm", + sig: "n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams", + call: "n, x, incX, y, incY, p", + extraSetup: "p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{0, -0.625, 0.9375,0}}", + oneInput: false, + extraName: "OffDia", + }, + { + camel: "Drotm", + sig: "n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams", + call: "n, x, incX, y, incY, p", + extraSetup: "p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{5.0 / 12, 0, 0, 0.625}}", + oneInput: false, + extraName: "Dia", + }, + { + camel: "Drotm", + sig: "n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams", + call: "n, x, incX, y, incY, p", + extraSetup: "p := blas.DrotmParams{Flag: blas.OffDiagonal, H: [4]float64{4096, -3584, 1792, 4096}}", + oneInput: false, + extraName: "Resc", + }, + { + camel: "Dscal", + sig: "n int, alpha float64, x []float64, incX int", + call: "n, alpha, x, incX", + extraSetup: "alpha := 2.4", + oneInput: true, + }, +} + +func init() { + gopath = os.Getenv("GOPATH") + if gopath == "" { + panic("gopath not set") + } +} + +func main() { + blasPath := filepath.Join(gopath, "src", "github.com", "gonum", "blas") + + pkgs := []struct{ name string }{{name: "native"}, {name: "cgo"}} + + for _, pkg := range pkgs { + err := level1(filepath.Join(blasPath, pkg.name), pkg.name) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + err = exec.Command("go", "fmt", path.Join("github.com", "gonum", "blas", pkg.name)).Run() + if err != nil { + fmt.Println(err) + os.Exit(1) + } + } +} + +func printHeader(f *os.File, name string) error { + if _, err := f.Write(autogen); err != nil { + return err + } + f.WriteString("\n\n") + f.Write(copyrightnotice) + f.WriteString("\n\n") + f.WriteString("package " + name) + f.WriteString("\n\n") + f.Write(imports) + f.WriteString("\n\n") + return nil +} + +// Generate the benchmark scripts for level1 +func level1(benchPath string, pkgname string) error { + // Generate level 1 benchmarks + level1Filepath := filepath.Join(benchPath, "level1doubleBench_auto_test.go") + f, err := os.Create(level1Filepath) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + defer f.Close() + printHeader(f, pkgname) + + // Print all of the constants + f.WriteString("const (\n") + f.WriteString("\tposInc1 = " + strconv.Itoa(posInc1) + "\n") + f.WriteString("\tposInc2 = " + strconv.Itoa(posInc2) + "\n") + f.WriteString("\tnegInc1 = " + strconv.Itoa(negInc1) + "\n") + f.WriteString("\tnegInc2 = " + strconv.Itoa(negInc2) + "\n") + for _, con := range level1Sizes { + f.WriteString("\t" + con.upper + " = " + strconv.Itoa(con.size) + "\n") + } + f.WriteString(")\n") + f.WriteString("\n") + + // Write the randomSlice function + f.Write(randomSliceFunction) + f.WriteString("\n\n") + + // Start writing the benchmarks + for _, fun := range level1Functions { + writeLevel1Benchmark(fun, f) + f.WriteString("\n/* ------------------ */ \n") + } + + return nil +} + +func writeLevel1Benchmark(fun level1functionStruct, f *os.File) { + // First, write the base benchmark file + f.WriteString("func benchmark" + fun.camel + fun.extraName + "(b *testing.B, ") + f.WriteString(fun.sig) + f.WriteString(") {\n") + + f.WriteString("b.ResetTimer()\n") + f.WriteString("for i := 0; i < b.N; i++{\n") + f.WriteString("\timpl." + fun.camel + "(") + + f.WriteString(fun.call) + f.WriteString(")\n}\n}\n") + f.WriteString("\n") + + // Write all of the benchmarks to call it + for _, sz := range level1Sizes { + lambda := func(incX, incY, name string, twoInput bool) { + f.WriteString("func Benchmark" + fun.camel + fun.extraName + sz.camel + name + "(b *testing.B){\n") + f.WriteString("n := " + sz.upper + "\n") + f.WriteString("incX := " + incX + "\n") + f.WriteString("x := randomSlice(n, incX)\n") + if twoInput { + f.WriteString("incY := " + incY + "\n") + f.WriteString("y := randomSlice(n, incY)\n") + } + f.WriteString(fun.extraSetup + "\n") + f.WriteString("benchmark" + fun.camel + fun.extraName + "(b, " + fun.call + ")\n") + f.WriteString("}\n\n") + } + if fun.oneInput { + lambda("1", "", "UnitaryInc", false) + lambda("posInc1", "", "PosInc", false) + } else { + lambda("1", "1", "BothUnitary", true) + lambda("posInc1", "1", "IncUni", true) + lambda("1", "negInc1", "UniInc", true) + lambda("posInc1", "negInc1", "BothInc", true) + } + } +} diff --git a/blas/testblas/benchsize.go b/blas/testblas/benchsize.go new file mode 100644 index 00000000..41d132be --- /dev/null +++ b/blas/testblas/benchsize.go @@ -0,0 +1,8 @@ +package testblas + +const ( + SmallMat = 10 + MediumMat = 100 + LargeMat = 1000 + HugeMat = 10000 +) diff --git a/blas/testblas/common.go b/blas/testblas/common.go new file mode 100644 index 00000000..7e1de805 --- /dev/null +++ b/blas/testblas/common.go @@ -0,0 +1,237 @@ +package testblas + +import ( + "math" + "testing" + + "github.com/gonum/blas" +) + +// throwPanic will throw unexpected panics if true, or will just report them as errors if false +const throwPanic = true + +func dTolEqual(a, b float64) bool { + if math.IsNaN(a) && math.IsNaN(b) { + return true + } + if a == b { + return true + } + m := math.Max(math.Abs(a), math.Abs(b)) + if m > 1 { + a /= m + b /= m + } + if math.Abs(a-b) < 1e-14 { + return true + } + return false +} + +func dSliceTolEqual(a, b []float64) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if !dTolEqual(a[i], b[i]) { + return false + } + } + return true +} + +func dStridedSliceTolEqual(n int, a []float64, inca int, b []float64, incb int) bool { + ia := 0 + ib := 0 + if inca <= 0 { + ia = -(n - 1) * inca + } + if incb <= 0 { + ib = -(n - 1) * incb + } + for i := 0; i < n; i++ { + if !dTolEqual(a[ia], b[ib]) { + return false + } + ia += inca + ib += incb + } + return true +} + +func dSliceEqual(a, b []float64) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if !(a[i] == b[i]) { + return false + } + } + return true +} + +func dCopyTwoTmp(x, xTmp, y, yTmp []float64) { + if len(x) != len(xTmp) { + panic("x size mismatch") + } + if len(y) != len(yTmp) { + panic("y size mismatch") + } + for i, val := range x { + xTmp[i] = val + } + for i, val := range y { + yTmp[i] = val + } +} + +// returns true if the function panics +func panics(f func()) (b bool) { + defer func() { + err := recover() + if err != nil { + b = true + } + }() + f() + return +} + +func testpanics(f func(), name string, t *testing.T) { + b := panics(f) + if !b { + t.Errorf("%v should panic and does not", name) + } +} + +func sliceOfSliceCopy(a [][]float64) [][]float64 { + n := make([][]float64, len(a)) + for i := range a { + n[i] = make([]float64, len(a[i])) + copy(n[i], a[i]) + } + return n +} + +func sliceCopy(a []float64) []float64 { + n := make([]float64, len(a)) + copy(n, a) + return n +} + +func flatten(a [][]float64) []float64 { + if len(a) == 0 { + return nil + } + m := len(a) + n := len(a[0]) + s := make([]float64, m*n) + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + s[i*n+j] = a[i][j] + } + } + return s +} + +func unflatten(a []float64, m, n int) [][]float64 { + s := make([][]float64, m) + for i := 0; i < m; i++ { + s[i] = make([]float64, n) + for j := 0; j < n; j++ { + s[i][j] = a[i*n+j] + } + } + return s +} + +// flattenTriangular turns the upper or lower triangle of a dense slice of slice +// into a single slice with packed storage. a must be a square matrix. +func flattenTriangular(a [][]float64, ul blas.Uplo) []float64 { + m := len(a) + aFlat := make([]float64, m*(m+1)/2) + var k int + if ul == blas.Upper { + for i := 0; i < m; i++ { + k += copy(aFlat[k:], a[i][i:]) + } + return aFlat + } + for i := 0; i < m; i++ { + k += copy(aFlat[k:], a[i][:i+1]) + } + return aFlat +} + +// flattenBanded turns a dense banded slice of slice into the compact banded matrix format +func flattenBanded(a [][]float64, ku, kl int) []float64 { + m := len(a) + n := len(a[0]) + if ku < 0 || kl < 0 { + panic("testblas: negative band length") + } + nRows := m + nCols := (ku + kl + 1) + aflat := make([]float64, nRows*nCols) + for i := range aflat { + aflat[i] = math.NaN() + } + // loop over the rows, and then the bands + // elements in the ith row stay in the ith row + // order in bands is kept + for i := 0; i < nRows; i++ { + min := -kl + if i-kl < 0 { + min = -i + } + max := ku + if i+ku >= n { + max = n - i - 1 + } + for j := min; j <= max; j++ { + col := kl + j + aflat[i*nCols+col] = a[i][i+j] + } + } + return aflat +} + +// makeIncremented takes a slice with inc == 1 and makes an incremented version +// and adds extra values on the end +func makeIncremented(x []float64, inc int, extra int) []float64 { + if inc == 0 { + panic("zero inc") + } + absinc := inc + if absinc < 0 { + absinc = -inc + } + xcopy := make([]float64, len(x)) + if inc > 0 { + copy(xcopy, x) + } else { + for i := 0; i < len(x); i++ { + xcopy[i] = x[len(x)-i-1] + } + } + + // don't use NaN because it makes comparison hard + // Do use a weird unique value for easier debugging + counter := 100.0 + var xnew []float64 + for i, v := range xcopy { + xnew = append(xnew, v) + if i != len(x)-1 { + for j := 0; j < absinc-1; j++ { + xnew = append(xnew, counter) + counter++ + } + } + } + for i := 0; i < extra; i++ { + xnew = append(xnew, counter) + counter++ + } + return xnew +} diff --git a/blas/testblas/common_test.go b/blas/testblas/common_test.go new file mode 100644 index 00000000..3dcbe517 --- /dev/null +++ b/blas/testblas/common_test.go @@ -0,0 +1,187 @@ +package testblas + +import ( + "math" + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +func TestFlattenBanded(t *testing.T) { + for i, test := range []struct { + dense [][]float64 + ku int + kl int + condensed [][]float64 + }{ + { + dense: [][]float64{{3}}, + ku: 0, + kl: 0, + condensed: [][]float64{{3}}, + }, + { + dense: [][]float64{ + {3, 4, 0}, + }, + ku: 1, + kl: 0, + condensed: [][]float64{ + {3, 4}, + }, + }, + { + dense: [][]float64{ + {3, 4, 0, 0, 0}, + }, + ku: 1, + kl: 0, + condensed: [][]float64{ + {3, 4}, + }, + }, + { + dense: [][]float64{ + {3, 4, 0}, + {0, 5, 8}, + {0, 0, 2}, + {0, 0, 0}, + {0, 0, 0}, + }, + ku: 1, + kl: 0, + condensed: [][]float64{ + {3, 4}, + {5, 8}, + {2, math.NaN()}, + {math.NaN(), math.NaN()}, + {math.NaN(), math.NaN()}, + }, + }, + { + dense: [][]float64{ + {3, 4, 6}, + {0, 5, 8}, + {0, 0, 2}, + {0, 0, 0}, + {0, 0, 0}, + }, + ku: 2, + kl: 0, + condensed: [][]float64{ + {3, 4, 6}, + {5, 8, math.NaN()}, + {2, math.NaN(), math.NaN()}, + {math.NaN(), math.NaN(), math.NaN()}, + {math.NaN(), math.NaN(), math.NaN()}, + }, + }, + { + dense: [][]float64{ + {3, 4, 6}, + {1, 5, 8}, + {0, 6, 2}, + {0, 0, 7}, + {0, 0, 0}, + }, + ku: 2, + kl: 1, + condensed: [][]float64{ + {math.NaN(), 3, 4, 6}, + {1, 5, 8, math.NaN()}, + {6, 2, math.NaN(), math.NaN()}, + {7, math.NaN(), math.NaN(), math.NaN()}, + {math.NaN(), math.NaN(), math.NaN(), math.NaN()}, + }, + }, + { + dense: [][]float64{ + {1, 2, 0}, + {3, 4, 5}, + {6, 7, 8}, + {0, 9, 10}, + {0, 0, 11}, + }, + ku: 1, + kl: 2, + condensed: [][]float64{ + {math.NaN(), math.NaN(), 1, 2}, + {math.NaN(), 3, 4, 5}, + {6, 7, 8, math.NaN()}, + {9, 10, math.NaN(), math.NaN()}, + {11, math.NaN(), math.NaN(), math.NaN()}, + }, + }, + { + dense: [][]float64{ + {1, 0, 0}, + {3, 4, 0}, + {6, 7, 8}, + {0, 9, 10}, + {0, 0, 11}, + }, + ku: 0, + kl: 2, + condensed: [][]float64{ + {math.NaN(), math.NaN(), 1}, + {math.NaN(), 3, 4}, + {6, 7, 8}, + {9, 10, math.NaN()}, + {11, math.NaN(), math.NaN()}, + }, + }, + { + dense: [][]float64{ + {1, 0, 0, 0, 0}, + {3, 4, 0, 0, 0}, + {1, 3, 5, 0, 0}, + }, + ku: 0, + kl: 2, + condensed: [][]float64{ + {math.NaN(), math.NaN(), 1}, + {math.NaN(), 3, 4}, + {1, 3, 5}, + }, + }, + } { + condensed := flattenBanded(test.dense, test.ku, test.kl) + correct := flatten(test.condensed) + if !floats.Same(condensed, correct) { + t.Errorf("Case %v mismatch. Want %v, got %v.", i, correct, condensed) + } + } +} + +func TestFlattenTriangular(t *testing.T) { + for i, test := range []struct { + a [][]float64 + ans []float64 + ul blas.Uplo + }{ + { + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + ul: blas.Upper, + ans: []float64{1, 2, 3, 4, 5, 6}, + }, + { + a: [][]float64{ + {1, 0, 0}, + {2, 3, 0}, + {4, 5, 6}, + }, + ul: blas.Lower, + ans: []float64{1, 2, 3, 4, 5, 6}, + }, + } { + a := flattenTriangular(test.a, test.ul) + if !floats.Equal(a, test.ans) { + t.Errorf("Case %v. Want %v, got %v.", i, test.ans, a) + } + } +} diff --git a/blas/testblas/dgbmv.go b/blas/testblas/dgbmv.go new file mode 100644 index 00000000..8729b5b4 --- /dev/null +++ b/blas/testblas/dgbmv.go @@ -0,0 +1,94 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" +) + +type Dgbmver interface { + Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) +} + +func DgbmvTest(t *testing.T, blasser Dgbmver) { + for i, test := range []struct { + tA blas.Transpose + m, n int + kL, kU int + alpha float64 + a [][]float64 + lda int + x []float64 + beta float64 + y []float64 + ans []float64 + }{ + { + tA: blas.NoTrans, + m: 9, + n: 6, + lda: 4, + kL: 2, + kU: 1, + alpha: 3.0, + beta: 2.0, + a: [][]float64{ + {5, 3, 0, 0, 0, 0}, + {-1, 2, 9, 0, 0, 0}, + {4, 8, 3, 6, 0, 0}, + {0, -1, 8, 2, 1, 0}, + {0, 0, 9, 9, 9, 5}, + {0, 0, 0, 2, -3, 2}, + {0, 0, 0, 0, 1, 5}, + {0, 0, 0, 0, 0, 6}, + {0, 0, 0, 0, 0, 0}, + }, + x: []float64{1, 2, 3, 4, 5, 6}, + y: []float64{-1, -2, -3, -4, -5, -6, -7, -8, -9}, + ans: []float64{31, 86, 153, 97, 404, 3, 91, 92, -18}, + }, + { + tA: blas.Trans, + m: 9, + n: 6, + lda: 4, + kL: 2, + kU: 1, + alpha: 3.0, + beta: 2.0, + a: [][]float64{ + {5, 3, 0, 0, 0, 0}, + {-1, 2, 9, 0, 0, 0}, + {4, 8, 3, 6, 0, 0}, + {0, -1, 8, 2, 1, 0}, + {0, 0, 9, 9, 9, 5}, + {0, 0, 0, 2, -3, 2}, + {0, 0, 0, 0, 1, 5}, + {0, 0, 0, 0, 0, 6}, + {0, 0, 0, 0, 0, 0}, + }, + x: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}, + y: []float64{-1, -2, -3, -4, -5, -6}, + ans: []float64{43, 77, 306, 241, 104, 348}, + }, + } { + extra := 3 + aFlat := flattenBanded(test.a, test.kU, test.kL) + incTest := func(incX, incY, extra int) { + xnew := makeIncremented(test.x, incX, extra) + ynew := makeIncremented(test.y, incY, extra) + ans := makeIncremented(test.ans, incY, extra) + blasser.Dgbmv(test.tA, test.m, test.n, test.kL, test.kU, test.alpha, aFlat, test.lda, xnew, incX, test.beta, ynew, incY) + if !dSliceTolEqual(ans, ynew) { + t.Errorf("Case %v: Want %v, got %v", i, ans, ynew) + } + } + incTest(1, 1, extra) + incTest(1, 3, extra) + incTest(1, -3, extra) + incTest(2, 3, extra) + incTest(2, -3, extra) + incTest(3, 2, extra) + incTest(-3, 2, extra) + } +} diff --git a/blas/testblas/dgemm.go b/blas/testblas/dgemm.go new file mode 100644 index 00000000..4519d7ef --- /dev/null +++ b/blas/testblas/dgemm.go @@ -0,0 +1,252 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" +) + +type Dgemmer interface { + Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) +} + +type DgemmCase struct { + isATrans bool + m, n, k int + alpha, beta float64 + a [][]float64 + aTrans [][]float64 // transpose of a + b [][]float64 + c [][]float64 + ans [][]float64 +} + +var DgemmCases = []DgemmCase{ + + { + m: 4, + n: 3, + k: 2, + isATrans: false, + alpha: 2, + beta: 0.5, + a: [][]float64{ + {1, 2}, + {4, 5}, + {7, 8}, + {10, 11}, + }, + b: [][]float64{ + {1, 5, 6}, + {5, -8, 8}, + }, + c: [][]float64{ + {4, 8, -9}, + {12, 16, -8}, + {1, 5, 15}, + {-3, -4, 7}, + }, + ans: [][]float64{ + {24, -18, 39.5}, + {64, -32, 124}, + {94.5, -55.5, 219.5}, + {128.5, -78, 299.5}, + }, + }, + { + m: 4, + n: 2, + k: 3, + isATrans: false, + alpha: 2, + beta: 0.5, + a: [][]float64{ + {1, 2, 3}, + {4, 5, 6}, + {7, 8, 9}, + {10, 11, 12}, + }, + b: [][]float64{ + {1, 5}, + {5, -8}, + {6, 2}, + }, + c: [][]float64{ + {4, 8}, + {12, 16}, + {1, 5}, + {-3, -4}, + }, + ans: [][]float64{ + {60, -6}, + {136, -8}, + {202.5, -19.5}, + {272.5, -30}, + }, + }, + { + m: 3, + n: 2, + k: 4, + isATrans: false, + alpha: 2, + beta: 0.5, + a: [][]float64{ + {1, 2, 3, 4}, + {4, 5, 6, 7}, + {8, 9, 10, 11}, + }, + b: [][]float64{ + {1, 5}, + {5, -8}, + {6, 2}, + {8, 10}, + }, + c: [][]float64{ + {4, 8}, + {12, 16}, + {9, -10}, + }, + ans: [][]float64{ + {124, 74}, + {248, 132}, + {406.5, 191}, + }, + }, + { + m: 3, + n: 4, + k: 2, + isATrans: false, + alpha: 2, + beta: 0.5, + a: [][]float64{ + {1, 2}, + {4, 5}, + {8, 9}, + }, + b: [][]float64{ + {1, 5, 2, 1}, + {5, -8, 2, 1}, + }, + c: [][]float64{ + {4, 8, 2, 2}, + {12, 16, 8, 9}, + {9, -10, 10, 10}, + }, + ans: [][]float64{ + {24, -18, 13, 7}, + {64, -32, 40, 22.5}, + {110.5, -69, 73, 39}, + }, + }, + { + m: 2, + n: 4, + k: 3, + isATrans: false, + alpha: 2, + beta: 0.5, + a: [][]float64{ + {1, 2, 3}, + {4, 5, 6}, + }, + b: [][]float64{ + {1, 5, 8, 8}, + {5, -8, 9, 10}, + {6, 2, -3, 2}, + }, + c: [][]float64{ + {4, 8, 7, 8}, + {12, 16, -2, 6}, + }, + ans: [][]float64{ + {60, -6, 37.5, 72}, + {136, -8, 117, 191}, + }, + }, + { + m: 2, + n: 3, + k: 4, + isATrans: false, + alpha: 2, + beta: 0.5, + a: [][]float64{ + {1, 2, 3, 4}, + {4, 5, 6, 7}, + }, + b: [][]float64{ + {1, 5, 8}, + {5, -8, 9}, + {6, 2, -3}, + {8, 10, 2}, + }, + c: [][]float64{ + {4, 8, 1}, + {12, 16, 6}, + }, + ans: [][]float64{ + {124, 74, 50.5}, + {248, 132, 149}, + }, + }, +} + +// assumes [][]float64 is actually a matrix +func transpose(a [][]float64) [][]float64 { + b := make([][]float64, len(a[0])) + for i := range b { + b[i] = make([]float64, len(a)) + for j := range b[i] { + b[i][j] = a[j][i] + } + } + return b +} + +func TestDgemm(t *testing.T, blasser Dgemmer) { + for i, test := range DgemmCases { + // Test that it passes row major + dgemmcomp(i, "RowMajorNoTrans", t, blasser, blas.NoTrans, blas.NoTrans, + test.m, test.n, test.k, test.alpha, test.beta, test.a, test.b, test.c, test.ans) + // Try with A transposed + dgemmcomp(i, "RowMajorTransA", t, blasser, blas.Trans, blas.NoTrans, + test.m, test.n, test.k, test.alpha, test.beta, transpose(test.a), test.b, test.c, test.ans) + // Try with B transposed + dgemmcomp(i, "RowMajorTransB", t, blasser, blas.NoTrans, blas.Trans, + test.m, test.n, test.k, test.alpha, test.beta, test.a, transpose(test.b), test.c, test.ans) + // Try with both transposed + dgemmcomp(i, "RowMajorTransBoth", t, blasser, blas.Trans, blas.Trans, + test.m, test.n, test.k, test.alpha, test.beta, transpose(test.a), transpose(test.b), test.c, test.ans) + } +} + +func dgemmcomp(i int, name string, t *testing.T, blasser Dgemmer, tA, tB blas.Transpose, m, n, k int, + alpha, beta float64, a [][]float64, b [][]float64, c [][]float64, ans [][]float64) { + + aFlat := flatten(a) + aCopy := flatten(a) + bFlat := flatten(b) + bCopy := flatten(b) + cFlat := flatten(c) + ansFlat := flatten(ans) + lda := len(a[0]) + ldb := len(b[0]) + ldc := len(c[0]) + + // Compute the matrix multiplication + blasser.Dgemm(tA, tB, m, n, k, alpha, aFlat, lda, bFlat, ldb, beta, cFlat, ldc) + + if !dSliceEqual(aFlat, aCopy) { + t.Errorf("Test %v case %v: a changed during call to Dgemm", i, name) + } + if !dSliceEqual(bFlat, bCopy) { + t.Errorf("Test %v case %v: b changed during call to Dgemm", i, name) + } + + if !dSliceTolEqual(ansFlat, cFlat) { + t.Errorf("Test %v case %v: answer mismatch. Expected %v, Found %v", i, name, ansFlat, cFlat) + } + // TODO: Need to add a sub-slice test where don't use up full matrix +} diff --git a/blas/testblas/dgemmbench.go b/blas/testblas/dgemmbench.go new file mode 100644 index 00000000..d54b747d --- /dev/null +++ b/blas/testblas/dgemmbench.go @@ -0,0 +1,39 @@ +package testblas + +import ( + "math/rand" + "testing" + + "github.com/gonum/blas" +) + +func DgemmBenchmark(b *testing.B, dgemm Dgemmer, m, n, k int, tA, tB blas.Transpose) { + a := make([]float64, m*k) + for i := range a { + a[i] = rand.Float64() + } + bv := make([]float64, k*n) + for i := range bv { + bv[i] = rand.Float64() + } + c := make([]float64, m*n) + for i := range c { + c[i] = rand.Float64() + } + var lda, ldb int + if tA == blas.Trans { + lda = m + } else { + lda = k + } + if tB == blas.Trans { + ldb = k + } else { + ldb = n + } + ldc := n + b.ResetTimer() + for i := 0; i < b.N; i++ { + dgemm.Dgemm(tA, tB, m, n, k, 3.0, a, lda, bv, ldb, 1.0, c, ldc) + } +} diff --git a/blas/testblas/dgemv.go b/blas/testblas/dgemv.go new file mode 100644 index 00000000..e756c201 --- /dev/null +++ b/blas/testblas/dgemv.go @@ -0,0 +1,680 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" +) + +type DgemvCase struct { + Name string + m int + n int + A [][]float64 + tA blas.Transpose + x []float64 + incX int + y []float64 + incY int + xCopy []float64 + yCopy []float64 + + Subcases []DgemvSubcase +} + +type DgemvSubcase struct { + mulXNeg1 bool + mulYNeg1 bool + alpha float64 + beta float64 + ans []float64 +} + +var DgemvCases = []DgemvCase{ + { + Name: "M_gt_N_Inc1_NoTrans", + tA: blas.NoTrans, + m: 5, + n: 3, + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + {1, 1, 2}, + {9, 2, 5}, + }, + incX: 1, + incY: 1, + x: []float64{1, 2, 3}, + y: []float64{7, 8, 9, 10, 11}, + + Subcases: []DgemvSubcase{ + { + alpha: 0, + beta: 0, + ans: []float64{0, 0, 0, 0, 0}, + }, + { + alpha: 0, + beta: 1, + ans: []float64{7, 8, 9, 10, 11}, + }, + { + alpha: 1, + beta: 0, + ans: []float64{40.8, 43.9, 33, 9, 28}, + }, + { + alpha: 8, + beta: -6, + ans: []float64{284.4, 303.2, 210, 12, 158}, + }, + }, + }, + { + Name: "M_gt_N_Inc1_Trans", + tA: blas.Trans, + m: 5, + n: 3, + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + {1, 1, 2}, + {9, 2, 5}, + }, + incX: 1, + incY: 1, + x: []float64{1, 2, 3, -4, 5}, + y: []float64{7, 8, 9}, + + Subcases: []DgemvSubcase{ + { + alpha: 0, + beta: 0, + ans: []float64{0, 0, 0}, + }, + { + alpha: 0, + beta: 1, + ans: []float64{7, 8, 9}, + }, + { + alpha: 1, + beta: 0, + ans: []float64{94.3, 40.2, 52.3}, + }, + { + alpha: 8, + beta: -6, + ans: []float64{712.4, 273.6, 364.4}, + }, + }, + }, + { + Name: "M_eq_N_Inc1_NoTrans", + tA: blas.NoTrans, + m: 3, + n: 3, + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + }, + incX: 1, + incY: 1, + x: []float64{1, 2, 3}, + y: []float64{7, 2, 2}, + + Subcases: []DgemvSubcase{ + { + alpha: 0, + beta: 0, + ans: []float64{0, 0, 0}, + }, + { + alpha: 0, + beta: 1, + ans: []float64{7, 2, 2}, + }, + { + alpha: 1, + beta: 0, + ans: []float64{40.8, 43.9, 33}, + }, + { + alpha: 8, + beta: -6, + ans: []float64{40.8*8 - 6*7, 43.9*8 - 6*2, 33*8 - 6*2}, + }, + }, + }, + { + Name: "M_eq_N_Inc1_Trans", + tA: blas.Trans, + m: 3, + n: 3, + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + }, + incX: 1, + incY: 1, + x: []float64{1, 2, 3}, + y: []float64{7, 2, 2}, + + Subcases: []DgemvSubcase{ + { + alpha: 8, + beta: -6, + ans: []float64{384.4, 261.6, 270.4}, + }, + }, + }, + { + Name: "M_lt_N_Inc1_NoTrans", + tA: blas.NoTrans, + m: 3, + n: 5, + A: [][]float64{ + {4.1, 6.2, 8.1, 10, 7}, + {9.6, 3.5, 9.1, -2, 9}, + {10, 7, 3, 1, -5}, + }, + incX: 1, + incY: 1, + x: []float64{1, 2, 3, -7.6, 8.1}, + y: []float64{7, 2, 2}, + + Subcases: []DgemvSubcase{ + { + alpha: 0, + beta: 0, + ans: []float64{0, 0, 0}, + }, + { + alpha: 0, + beta: 1, + ans: []float64{7, 2, 2}, + }, + { + alpha: 1, + beta: 0, + ans: []float64{21.5, 132, -15.1}, + }, + + { + alpha: 8, + beta: -6, + ans: []float64{21.5*8 - 6*7, 132*8 - 6*2, -15.1*8 - 6*2}, + }, + }, + }, + { + Name: "M_lt_N_Inc1_Trans", + tA: blas.Trans, + m: 3, + n: 5, + A: [][]float64{ + {4.1, 6.2, 8.1, 10, 7}, + {9.6, 3.5, 9.1, -2, 9}, + {10, 7, 3, 1, -5}, + }, + incX: 1, + incY: 1, + x: []float64{1, 2, 3}, + y: []float64{7, 2, 2, -3, 5}, + + Subcases: []DgemvSubcase{ + { + alpha: 8, + beta: -6, + ans: []float64{384.4, 261.6, 270.4, 90, 50}, + }, + }, + }, + { + Name: "M_gt_N_Part1_NoTrans", + tA: blas.NoTrans, + m: 5, + n: 3, + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + {1, 1, 2}, + {9, 2, 5}, + }, + incX: 1, + incY: 2, + x: []float64{1, 2, 3}, + y: []float64{7, 100, 8, 101, 9, 102, 10, 103, 11}, + + Subcases: []DgemvSubcase{ + { + alpha: 0, + beta: 0, + ans: []float64{0, 100, 0, 101, 0, 102, 0, 103, 0}, + }, + { + alpha: 0, + beta: 1, + ans: []float64{7, 100, 8, 101, 9, 102, 10, 103, 11}, + }, + { + alpha: 1, + beta: 0, + ans: []float64{40.8, 100, 43.9, 101, 33, 102, 9, 103, 28}, + }, + { + alpha: 8, + beta: -6, + ans: []float64{284.4, 100, 303.2, 101, 210, 102, 12, 103, 158}, + }, + }, + }, + { + Name: "M_gt_N_Part1_Trans", + tA: blas.Trans, + m: 5, + n: 3, + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + {1, 1, 2}, + {9, 2, 5}, + }, + incX: 1, + incY: 2, + x: []float64{1, 2, 3, -4, 5}, + y: []float64{7, 100, 8, 101, 9}, + + Subcases: []DgemvSubcase{ + { + alpha: 0, + beta: 0, + ans: []float64{0, 100, 0, 101, 0}, + }, + { + alpha: 0, + beta: 1, + ans: []float64{7, 100, 8, 101, 9}, + }, + { + alpha: 1, + beta: 0, + ans: []float64{94.3, 100, 40.2, 101, 52.3}, + }, + { + alpha: 8, + beta: -6, + ans: []float64{712.4, 100, 273.6, 101, 364.4}, + }, + }, + }, + { + Name: "M_gt_N_IncNot1_NoTrans", + tA: blas.NoTrans, + m: 5, + n: 3, + + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + {1, 1, 2}, + {9, 2, 5}, + }, + incX: 2, + incY: 3, + x: []float64{1, 15, 2, 150, 3}, + y: []float64{7, 2, 6, 8, -4, -5, 9, 1, 1, 10, 19, 22, 11}, + Subcases: []DgemvSubcase{ + { + alpha: 8, + beta: -6, + ans: []float64{284.4, 2, 6, 303.2, -4, -5, 210, 1, 1, 12, 19, 22, 158}, + }, + { + mulXNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{220.4, 2, 6, 311.2, -4, -5, 322, 1, 1, -4, 19, 22, 222}, + }, + { + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{182, 2, 6, 24, -4, -5, 210, 1, 1, 291.2, 19, 22, 260.4}, + }, + { + mulXNeg1: true, + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{246, 2, 6, 8, -4, -5, 322, 1, 1, 299.2, 19, 22, 196.4}, + }, + }, + }, + { + Name: "M_gt_N_IncNot1_Trans", + tA: blas.Trans, + m: 5, + n: 3, + + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + {1, 1, 2}, + {9, 2, 5}, + }, + incX: 2, + incY: 3, + x: []float64{1, 15, 2, 150, 3, 8, -3, 6, 5}, + y: []float64{7, 2, 6, 8, -4, -5, 9}, + Subcases: []DgemvSubcase{ + { + alpha: 8, + beta: -6, + ans: []float64{720.4, 2, 6, 281.6, -4, -5, 380.4}, + }, + { + mulXNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{219.6, 2, 6, 316, -4, -5, 195.6}, + }, + { + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{392.4, 2, 6, 281.6, -4, -5, 708.4}, + }, + { + mulXNeg1: true, + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{207.6, 2, 6, 316, -4, -5, 207.6}, + }, + }, + }, + { + Name: "M_eq_N_IncNot1_NoTrans", + tA: blas.NoTrans, + m: 3, + n: 3, + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + }, + incX: 2, + incY: 3, + x: []float64{1, 15, 2, 150, 3}, + y: []float64{7, 2, 6, 8, -4, -5, 9}, + Subcases: []DgemvSubcase{ + { + alpha: 8, + beta: -6, + ans: []float64{284.4, 2, 6, 303.2, -4, -5, 210}, + }, + { + mulXNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{220.4, 2, 6, 311.2, -4, -5, 322}, + }, + { + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{222, 2, 6, 303.2, -4, -5, 272.4}, + }, + { + mulXNeg1: true, + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{334, 2, 6, 311.2, -4, -5, 208.4}, + }, + }, + }, + { + Name: "M_eq_N_IncNot1_Trans", + tA: blas.Trans, + m: 3, + n: 3, + A: [][]float64{ + {4.1, 6.2, 8.1}, + {9.6, 3.5, 9.1}, + {10, 7, 3}, + }, + incX: 2, + incY: 3, + x: []float64{1, 15, 2, 150, 3}, + y: []float64{7, 2, 6, 8, -4, -5, 9}, + + Subcases: []DgemvSubcase{ + { + alpha: 8, + beta: -6, + ans: []float64{384.4, 2, 6, 225.6, -4, -5, 228.4}, + }, + { + mulXNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{290, 2, 6, 212.8, -4, -5, 310}, + }, + { + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{240.4, 2, 6, 225.6, -4, -5, 372.4}, + }, + { + mulXNeg1: true, + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{322, 2, 6, 212.8, -4, -5, 278}, + }, + }, + }, + { + Name: "M_lt_N_IncNot1_NoTrans", + tA: blas.NoTrans, + m: 3, + n: 5, + A: [][]float64{ + {4.1, 6.2, 8.1, 10, 11}, + {9.6, 3.5, 9.1, -3, -2}, + {10, 7, 3, -7, -4}, + }, + incX: 2, + incY: 3, + x: []float64{1, 15, 2, 150, 3, -2, -4, 8, -9}, + y: []float64{7, 2, 6, 8, -4, -5, 9}, + + Subcases: []DgemvSubcase{ + { + alpha: 8, + beta: -6, + ans: []float64{-827.6, 2, 6, 543.2, -4, -5, 722}, + }, + { + mulXNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{-93.2, 2, 6, -696.8, -4, -5, -1070}, + }, + { + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{734, 2, 6, 543.2, -4, -5, -839.6}, + }, + { + mulXNeg1: true, + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{-1058, 2, 6, -696.8, -4, -5, -105.2}, + }, + }, + }, + { + Name: "M_lt_N_IncNot1_Trans", + tA: blas.Trans, + m: 3, + n: 5, + A: [][]float64{ + {4.1, 6.2, 8.1, 10, 11}, + {9.6, 3.5, 9.1, -3, -2}, + {10, 7, 3, -7, -4}, + }, + incX: 2, + incY: 3, + x: []float64{1, 15, 2, 150, 3}, + y: []float64{7, 2, 6, 8, -4, -5, 9, -4, -1, -9, 1, 1, 2}, + + Subcases: []DgemvSubcase{ + { + alpha: 8, + beta: -6, + ans: []float64{384.4, 2, 6, 225.6, -4, -5, 228.4, -4, -1, -82, 1, 1, -52}, + }, + { + mulXNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{290, 2, 6, 212.8, -4, -5, 310, -4, -1, 190, 1, 1, 188}, + }, + { + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{-82, 2, 6, -184, -4, -5, 228.4, -4, -1, 327.6, 1, 1, 414.4}, + }, + { + mulXNeg1: true, + mulYNeg1: true, + alpha: 8, + beta: -6, + ans: []float64{158, 2, 6, 88, -4, -5, 310, -4, -1, 314.8, 1, 1, 320}, + }, + }, + }, + + // TODO: A can be longer than mxn. Add cases where it is longer + // TODO: x and y can also be longer. Add tests for these + // TODO: Add tests for dimension mismatch + // TODO: Add places with a "submatrix view", where lda != m +} + +type Dgemver interface { + Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) +} + +func DgemvTest(t *testing.T, blasser Dgemver) { + for _, test := range DgemvCases { + for i, cas := range test.Subcases { + // Test that it passes with row-major + dgemvcomp(t, test, cas, i, blasser) + + // Test the bad inputs + dgemvbad(t, test, cas, i, blasser) + } + } +} + +func dgemvcomp(t *testing.T, test DgemvCase, cas DgemvSubcase, i int, blasser Dgemver) { + x := sliceCopy(test.x) + y := sliceCopy(test.y) + a := sliceOfSliceCopy(test.A) + aFlat := flatten(a) + + lda := test.n + + incX := test.incX + if cas.mulXNeg1 { + incX *= -1 + } + incY := test.incY + if cas.mulYNeg1 { + incY *= -1 + } + + f := func() { + blasser.Dgemv(test.tA, test.m, test.n, cas.alpha, aFlat, lda, x, incX, cas.beta, y, incY) + } + if panics(f) { + t.Errorf("Test %v case %v: unexpected panic", test.Name, i) + if throwPanic { + blasser.Dgemv(test.tA, test.m, test.n, cas.alpha, aFlat, lda, x, incX, cas.beta, y, incY) + } + return + } + // Check that x and a are unchanged + if !dSliceEqual(x, test.x) { + t.Errorf("Test %v, case %v: x modified during call", test.Name, i) + } + aFlat2 := flatten(sliceOfSliceCopy(test.A)) + if !dSliceEqual(aFlat2, aFlat) { + t.Errorf("Test %v, case %v: a modified during call", test.Name, i) + } + + // Check that the answer matches + if !dSliceTolEqual(cas.ans, y) { + t.Errorf("Test %v, case %v: answer mismatch: Expected %v, Found %v", test.Name, i, cas.ans, y) + } +} + +func dgemvbad(t *testing.T, test DgemvCase, cas DgemvSubcase, i int, blasser Dgemver) { + x := sliceCopy(test.x) + y := sliceCopy(test.y) + a := sliceOfSliceCopy(test.A) + aFlatRow := flatten(a) + ldaRow := test.n + + f := func() { + blasser.Dgemv(312, test.m, test.n, cas.alpha, aFlatRow, ldaRow, x, test.incX, cas.beta, y, test.incY) + } + if !panics(f) { + t.Errorf("Test %v case %v: no panic for bad transpose", test.Name, i) + } + f = func() { + blasser.Dgemv(test.tA, -2, test.n, cas.alpha, aFlatRow, ldaRow, x, test.incX, cas.beta, y, test.incY) + } + if !panics(f) { + t.Errorf("Test %v case %v: no panic for m negative", test.Name, i) + } + f = func() { + blasser.Dgemv(test.tA, test.m, -4, cas.alpha, aFlatRow, ldaRow, x, test.incX, cas.beta, y, test.incY) + } + if !panics(f) { + t.Errorf("Test %v case %v: no panic for n negative", test.Name, i) + } + f = func() { + blasser.Dgemv(test.tA, test.m, test.n, cas.alpha, aFlatRow, ldaRow, x, 0, cas.beta, y, test.incY) + } + if !panics(f) { + t.Errorf("Test %v case %v: no panic for incX zero", test.Name, i) + } + f = func() { + blasser.Dgemv(test.tA, test.m, test.n, cas.alpha, aFlatRow, ldaRow, x, test.incX, cas.beta, y, 0) + } + if !panics(f) { + t.Errorf("Test %v case %v: no panic for incY zero", test.Name, i) + } + f = func() { + blasser.Dgemv(test.tA, test.m, test.n, cas.alpha, aFlatRow, ldaRow-1, x, test.incX, cas.beta, y, test.incY) + } + if !panics(f) { + t.Errorf("Test %v case %v: no panic for lda too small row major", test.Name, i) + } +} diff --git a/blas/testblas/dger.go b/blas/testblas/dger.go new file mode 100644 index 00000000..c7c87965 --- /dev/null +++ b/blas/testblas/dger.go @@ -0,0 +1,164 @@ +package testblas + +import "testing" + +type Dgerer interface { + Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) +} + +func DgerTest(t *testing.T, blasser Dgerer) { + for _, test := range []struct { + name string + a [][]float64 + m int + n int + x []float64 + y []float64 + incX int + incY int + ansAlphaEq1 []float64 + + trueAns [][]float64 + }{ + { + name: "M gt N inc 1", + m: 5, + n: 3, + a: [][]float64{ + {1.3, 2.4, 3.5}, + {2.6, 2.8, 3.3}, + {-1.3, -4.3, -9.7}, + {8, 9, -10}, + {-12, -14, -6}, + }, + x: []float64{-2, -3, 0, 1, 2}, + y: []float64{-1.1, 5, 0}, + incX: 1, + incY: 1, + trueAns: [][]float64{{3.5, -7.6, 3.5}, {5.9, -12.2, 3.3}, {-1.3, -4.3, -9.7}, {6.9, 14, -10}, {-14.2, -4, -6}}, + }, + { + name: "M eq N inc 1", + m: 3, + n: 3, + a: [][]float64{ + {1.3, 2.4, 3.5}, + {2.6, 2.8, 3.3}, + {-1.3, -4.3, -9.7}, + }, + x: []float64{-2, -3, 0}, + y: []float64{-1.1, 5, 0}, + incX: 1, + incY: 1, + trueAns: [][]float64{{3.5, -7.6, 3.5}, {5.9, -12.2, 3.3}, {-1.3, -4.3, -9.7}}, + }, + + { + name: "M lt N inc 1", + m: 3, + n: 6, + a: [][]float64{ + {1.3, 2.4, 3.5, 4.8, 1.11, -9}, + {2.6, 2.8, 3.3, -3.4, 6.2, -8.7}, + {-1.3, -4.3, -9.7, -3.1, 8.9, 8.9}, + }, + x: []float64{-2, -3, 0}, + y: []float64{-1.1, 5, 0, 9, 19, 22}, + incX: 1, + incY: 1, + trueAns: [][]float64{{3.5, -7.6, 3.5, -13.2, -36.89, -53}, {5.9, -12.2, 3.3, -30.4, -50.8, -74.7}, {-1.3, -4.3, -9.7, -3.1, 8.9, 8.9}}, + }, + { + name: "M gt N inc not 1", + m: 5, + n: 3, + a: [][]float64{ + {1.3, 2.4, 3.5}, + {2.6, 2.8, 3.3}, + {-1.3, -4.3, -9.7}, + {8, 9, -10}, + {-12, -14, -6}, + }, + x: []float64{-2, -3, 0, 1, 2, 6, 0, 9, 7}, + y: []float64{-1.1, 5, 0, 8, 7, -5, 7}, + incX: 2, + incY: 3, + trueAns: [][]float64{{3.5, -13.6, -10.5}, {2.6, 2.8, 3.3}, {-3.5, 11.7, 4.3}, {8, 9, -10}, {-19.700000000000003, 42, 43}}, + }, + { + name: "M eq N inc not 1", + m: 3, + n: 3, + a: [][]float64{ + {1.3, 2.4, 3.5}, + {2.6, 2.8, 3.3}, + {-1.3, -4.3, -9.7}, + }, + x: []float64{-2, -3, 0, 8, 7, -9, 7, -6, 12, 6, 6, 6, -11}, + y: []float64{-1.1, 5, 0, 0, 9, 8, 6}, + incX: 4, + incY: 3, + trueAns: [][]float64{{3.5, 2.4, -8.5}, {-5.1, 2.8, 45.3}, {-14.5, -4.3, 62.3}}, + }, + { + name: "M lt N inc not 1", + m: 3, + n: 6, + a: [][]float64{ + {1.3, 2.4, 3.5, 4.8, 1.11, -9}, + {2.6, 2.8, 3.3, -3.4, 6.2, -8.7}, + {-1.3, -4.3, -9.7, -3.1, 8.9, 8.9}, + }, + x: []float64{-2, -3, 0, 0, 8, 0, 9, -3}, + y: []float64{-1.1, 5, 0, 9, 19, 22, 11, -8.11, -9.22, 9.87, 7}, + incX: 3, + incY: 2, + trueAns: [][]float64{{3.5, 2.4, -34.5, -17.2, 19.55, -23}, {2.6, 2.8, 3.3, -3.4, 6.2, -8.7}, {-11.2, -4.3, 161.3, 95.9, -74.08, 71.9}}, + }, + } { + // TODO: Add tests where a is longer + // TODO: Add panic tests + // TODO: Add negative increment tests + + x := sliceCopy(test.x) + y := sliceCopy(test.y) + + a := sliceOfSliceCopy(test.a) + + // Test with row major + alpha := 1.0 + aFlat := flatten(a) + blasser.Dger(test.m, test.n, alpha, x, test.incX, y, test.incY, aFlat, test.n) + ans := unflatten(aFlat, test.m, test.n) + dgercomp(t, x, test.x, y, test.y, ans, test.trueAns, test.name+" row maj") + + // Test with different alpha + alpha = 4.0 + aFlat = flatten(a) + blasser.Dger(test.m, test.n, alpha, x, test.incX, y, test.incY, aFlat, test.n) + ans = unflatten(aFlat, test.m, test.n) + trueCopy := sliceOfSliceCopy(test.trueAns) + for i := range trueCopy { + for j := range trueCopy[i] { + trueCopy[i][j] = alpha*(trueCopy[i][j]-a[i][j]) + a[i][j] + } + } + dgercomp(t, x, test.x, y, test.y, ans, trueCopy, test.name+" row maj alpha") + } +} + +func dgercomp(t *testing.T, x, xCopy, y, yCopy []float64, ans [][]float64, trueAns [][]float64, name string) { + if !dSliceEqual(x, xCopy) { + t.Errorf("case %v: x modified during call to dger", name) + } + if !dSliceEqual(y, yCopy) { + t.Errorf("case %v: x modified during call to dger", name) + } + + for i := range ans { + if !dSliceTolEqual(ans[i], trueAns[i]) { + t.Errorf("case %v: answer mismatch. Expected %v, Found %v", name, trueAns, ans) + break + } + } +} diff --git a/blas/testblas/dsbmv.go b/blas/testblas/dsbmv.go new file mode 100644 index 00000000..f4579bf8 --- /dev/null +++ b/blas/testblas/dsbmv.go @@ -0,0 +1,83 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" +) + +type Dsbmver interface { + Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) +} + +func DsbmvTest(t *testing.T, blasser Dsbmver) { + for i, test := range []struct { + ul blas.Uplo + n int + k int + alpha float64 + beta float64 + a [][]float64 + x []float64 + y []float64 + + ans []float64 + }{ + { + ul: blas.Upper, + n: 4, + k: 2, + alpha: 2, + beta: 3, + a: [][]float64{ + {7, 8, 2, 0}, + {0, 8, 2, -3}, + {0, 0, 3, 6}, + {0, 0, 0, 9}, + }, + x: []float64{1, 2, 3, 4}, + y: []float64{-1, -2, -3, -4}, + ans: []float64{55, 30, 69, 84}, + }, + { + ul: blas.Lower, + n: 4, + k: 2, + alpha: 2, + beta: 3, + a: [][]float64{ + {7, 0, 0, 0}, + {8, 8, 0, 0}, + {2, 2, 3, 0}, + {0, -3, 6, 9}, + }, + x: []float64{1, 2, 3, 4}, + y: []float64{-1, -2, -3, -4}, + ans: []float64{55, 30, 69, 84}, + }, + } { + extra := 0 + var aFlat []float64 + if test.ul == blas.Upper { + aFlat = flattenBanded(test.a, test.k, 0) + } else { + aFlat = flattenBanded(test.a, 0, test.k) + } + incTest := func(incX, incY, extra int) { + xnew := makeIncremented(test.x, incX, extra) + ynew := makeIncremented(test.y, incY, extra) + ans := makeIncremented(test.ans, incY, extra) + blasser.Dsbmv(test.ul, test.n, test.k, test.alpha, aFlat, test.k+1, xnew, incX, test.beta, ynew, incY) + if !dSliceTolEqual(ans, ynew) { + t.Errorf("Case %v: Want %v, got %v", i, ans, ynew) + } + } + incTest(1, 1, extra) + incTest(1, 3, extra) + incTest(1, -3, extra) + incTest(2, 3, extra) + incTest(2, -3, extra) + incTest(3, 2, extra) + incTest(-3, 2, extra) + } +} diff --git a/blas/testblas/dspmv.go b/blas/testblas/dspmv.go new file mode 100644 index 00000000..7100bbb4 --- /dev/null +++ b/blas/testblas/dspmv.go @@ -0,0 +1,73 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dspmver interface { + Dspmv(ul blas.Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int) +} + +func DspmvTest(t *testing.T, blasser Dspmver) { + for i, test := range []struct { + ul blas.Uplo + n int + a [][]float64 + x []float64 + y []float64 + alpha float64 + beta float64 + ans []float64 + }{ + { + ul: blas.Upper, + n: 3, + a: [][]float64{ + {5, 6, 7}, + {0, 8, 10}, + {0, 0, 13}, + }, + x: []float64{3, 4, 5}, + y: []float64{6, 7, 8}, + alpha: 2.1, + beta: -3, + ans: []float64{137.4, 189, 240.6}, + }, + { + ul: blas.Lower, + n: 3, + a: [][]float64{ + {5, 0, 0}, + {6, 8, 0}, + {7, 10, 13}, + }, + x: []float64{3, 4, 5}, + y: []float64{6, 7, 8}, + alpha: 2.1, + beta: -3, + ans: []float64{137.4, 189, 240.6}, + }, + } { + incTest := func(incX, incY, extra int) { + x := makeIncremented(test.x, incX, extra) + y := makeIncremented(test.y, incY, extra) + aFlat := flattenTriangular(test.a, test.ul) + ans := makeIncremented(test.ans, incY, extra) + + blasser.Dspmv(test.ul, test.n, test.alpha, aFlat, x, incX, test.beta, y, incY) + if !floats.EqualApprox(ans, y, 1e-14) { + t.Errorf("Case %v, incX=%v, incY=%v: Want %v, got %v.", i, incX, incY, ans, y) + } + } + incTest(1, 1, 0) + incTest(2, 3, 0) + incTest(3, 2, 0) + incTest(-3, 2, 0) + incTest(-2, 4, 0) + incTest(2, -1, 0) + incTest(-3, -4, 3) + } +} diff --git a/blas/testblas/dspr.go b/blas/testblas/dspr.go new file mode 100644 index 00000000..aa31731d --- /dev/null +++ b/blas/testblas/dspr.go @@ -0,0 +1,71 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" +) + +type Dsprer interface { + Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64) +} + +func DsprTest(t *testing.T, blasser Dsprer) { + for i, test := range []struct { + ul blas.Uplo + n int + a [][]float64 + x []float64 + alpha float64 + ans [][]float64 + }{ + { + ul: blas.Upper, + n: 4, + a: [][]float64{ + {10, 2, 0, 1}, + {0, 1, 2, 3}, + {0, 0, 9, 15}, + {0, 0, 0, -6}, + }, + x: []float64{1, 2, 0, 5}, + alpha: 8, + ans: [][]float64{ + {18, 18, 0, 41}, + {0, 33, 2, 83}, + {0, 0, 9, 15}, + {0, 0, 0, 194}, + }, + }, + { + ul: blas.Lower, + n: 3, + a: [][]float64{ + {10, 2, 0}, + {4, 1, 2}, + {2, 7, 9}, + }, + x: []float64{3, 0, 5}, + alpha: 8, + ans: [][]float64{ + {82, 2, 0}, + {4, 1, 2}, + {122, 7, 209}, + }, + }, + } { + incTest := func(incX, extra int) { + xnew := makeIncremented(test.x, incX, extra) + aFlat := flattenTriangular(test.a, test.ul) + ans := flattenTriangular(test.ans, test.ul) + blasser.Dspr(test.ul, test.n, test.alpha, xnew, incX, aFlat) + if !dSliceTolEqual(aFlat, ans) { + t.Errorf("Case %v, idx %v: Want %v, got %v.", i, incX, ans, aFlat) + } + } + incTest(1, 3) + incTest(1, 0) + incTest(3, 2) + incTest(-2, 2) + } +} diff --git a/blas/testblas/dspr2.go b/blas/testblas/dspr2.go new file mode 100644 index 00000000..fb657d63 --- /dev/null +++ b/blas/testblas/dspr2.go @@ -0,0 +1,76 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dspr2er interface { + Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64) +} + +func Dspr2Test(t *testing.T, blasser Dspr2er) { + for i, test := range []struct { + n int + a [][]float64 + ul blas.Uplo + x []float64 + y []float64 + alpha float64 + ans [][]float64 + }{ + { + n: 3, + a: [][]float64{ + {7, 2, 4}, + {0, 3, 5}, + {0, 0, 6}, + }, + x: []float64{2, 3, 4}, + y: []float64{5, 6, 7}, + alpha: 2, + ul: blas.Upper, + ans: [][]float64{ + {47, 56, 72}, + {0, 75, 95}, + {0, 0, 118}, + }, + }, + { + n: 3, + a: [][]float64{ + {7, 0, 0}, + {2, 3, 0}, + {4, 5, 6}, + }, + x: []float64{2, 3, 4}, + y: []float64{5, 6, 7}, + alpha: 2, + ul: blas.Lower, + ans: [][]float64{ + {47, 0, 0}, + {56, 75, 0}, + {72, 95, 118}, + }, + }, + } { + incTest := func(incX, incY, extra int) { + aFlat := flattenTriangular(test.a, test.ul) + x := makeIncremented(test.x, incX, extra) + y := makeIncremented(test.y, incY, extra) + blasser.Dspr2(test.ul, test.n, test.alpha, x, incX, y, incY, aFlat) + ansFlat := flattenTriangular(test.ans, test.ul) + if !floats.EqualApprox(aFlat, ansFlat, 1e-14) { + t.Errorf("Case %v, incX = %v, incY = %v. Want %v, got %v.", i, incX, incY, ansFlat, aFlat) + } + } + incTest(1, 1, 0) + incTest(-2, 1, 0) + incTest(-2, 3, 0) + incTest(2, -3, 0) + incTest(3, -2, 0) + incTest(-3, -4, 0) + } +} diff --git a/blas/testblas/dsymm.go b/blas/testblas/dsymm.go new file mode 100644 index 00000000..728d0522 --- /dev/null +++ b/blas/testblas/dsymm.go @@ -0,0 +1,277 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dsymmer interface { + Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) +} + +func DsymmTest(t *testing.T, blasser Dsymmer) { + for i, test := range []struct { + m int + n int + side blas.Side + ul blas.Uplo + a [][]float64 + b [][]float64 + c [][]float64 + alpha float64 + beta float64 + ans [][]float64 + }{ + { + side: blas.Left, + ul: blas.Upper, + m: 3, + n: 4, + a: [][]float64{ + {2, 3, 4}, + {0, 6, 7}, + {0, 0, 10}, + }, + b: [][]float64{ + {2, 3, 4, 8}, + {5, 6, 7, 15}, + {8, 9, 10, 20}, + }, + c: [][]float64{ + {8, 12, 2, 1}, + {9, 12, 9, 9}, + {12, 1, -1, 5}, + }, + alpha: 2, + beta: 3, + ans: [][]float64{ + {126, 156, 144, 285}, + {211, 252, 275, 535}, + {282, 291, 327, 689}, + }, + }, + { + side: blas.Left, + ul: blas.Upper, + m: 4, + n: 3, + a: [][]float64{ + {2, 3, 4, 8}, + {0, 6, 7, 9}, + {0, 0, 10, 10}, + {0, 0, 0, 11}, + }, + b: [][]float64{ + {2, 3, 4}, + {5, 6, 7}, + {8, 9, 10}, + {2, 1, 1}, + }, + c: [][]float64{ + {8, 12, 2}, + {9, 12, 9}, + {12, 1, -1}, + {1, 9, 5}, + }, + alpha: 2, + beta: 3, + ans: [][]float64{ + {158, 172, 160}, + {247, 270, 293}, + {322, 311, 347}, + {329, 385, 427}, + }, + }, + { + side: blas.Left, + ul: blas.Lower, + m: 3, + n: 4, + a: [][]float64{ + {2, 0, 0}, + {3, 6, 0}, + {4, 7, 10}, + }, + b: [][]float64{ + {2, 3, 4, 8}, + {5, 6, 7, 15}, + {8, 9, 10, 20}, + }, + c: [][]float64{ + {8, 12, 2, 1}, + {9, 12, 9, 9}, + {12, 1, -1, 5}, + }, + alpha: 2, + beta: 3, + ans: [][]float64{ + {126, 156, 144, 285}, + {211, 252, 275, 535}, + {282, 291, 327, 689}, + }, + }, + { + side: blas.Left, + ul: blas.Lower, + m: 4, + n: 3, + a: [][]float64{ + {2, 0, 0, 0}, + {3, 6, 0, 0}, + {4, 7, 10, 0}, + {8, 9, 10, 11}, + }, + b: [][]float64{ + {2, 3, 4}, + {5, 6, 7}, + {8, 9, 10}, + {2, 1, 1}, + }, + c: [][]float64{ + {8, 12, 2}, + {9, 12, 9}, + {12, 1, -1}, + {1, 9, 5}, + }, + alpha: 2, + beta: 3, + ans: [][]float64{ + {158, 172, 160}, + {247, 270, 293}, + {322, 311, 347}, + {329, 385, 427}, + }, + }, + { + side: blas.Right, + ul: blas.Upper, + m: 3, + n: 4, + a: [][]float64{ + {2, 0, 0, 0}, + {3, 6, 0, 0}, + {4, 7, 10, 0}, + {3, 4, 5, 6}, + }, + b: [][]float64{ + {2, 3, 4, 9}, + {5, 6, 7, -3}, + {8, 9, 10, -2}, + }, + c: [][]float64{ + {8, 12, 2, 10}, + {9, 12, 9, 10}, + {12, 1, -1, 10}, + }, + alpha: 2, + beta: 3, + ans: [][]float64{ + {32, 72, 86, 138}, + {47, 108, 167, -6}, + {68, 111, 197, 6}, + }, + }, + { + side: blas.Right, + ul: blas.Upper, + m: 4, + n: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 6, 0}, + {4, 7, 10}, + }, + b: [][]float64{ + {2, 3, 4}, + {5, 6, 7}, + {8, 9, 10}, + {2, 1, 1}, + }, + c: [][]float64{ + {8, 12, 2}, + {9, 12, 9}, + {12, 1, -1}, + {1, 9, 5}, + }, + alpha: 2, + beta: 3, + ans: [][]float64{ + {32, 72, 86}, + {47, 108, 167}, + {68, 111, 197}, + {11, 39, 35}, + }, + }, + { + side: blas.Right, + ul: blas.Lower, + m: 3, + n: 4, + a: [][]float64{ + {2, 0, 0, 0}, + {3, 6, 0, 0}, + {4, 7, 10, 0}, + {3, 4, 5, 6}, + }, + b: [][]float64{ + {2, 3, 4, 2}, + {5, 6, 7, 1}, + {8, 9, 10, 1}, + }, + c: [][]float64{ + {8, 12, 2, 1}, + {9, 12, 9, 9}, + {12, 1, -1, 5}, + }, + alpha: 2, + beta: 3, + ans: [][]float64{ + {94, 156, 164, 103}, + {145, 244, 301, 187}, + {208, 307, 397, 247}, + }, + }, + { + side: blas.Right, + ul: blas.Lower, + m: 4, + n: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 6, 0}, + {4, 7, 10}, + }, + b: [][]float64{ + {2, 3, 4}, + {5, 6, 7}, + {8, 9, 10}, + {2, 1, 1}, + }, + c: [][]float64{ + {8, 12, 2}, + {9, 12, 9}, + {12, 1, -1}, + {1, 9, 5}, + }, + alpha: 2, + beta: 3, + ans: [][]float64{ + {82, 140, 144}, + {139, 236, 291}, + {202, 299, 387}, + {25, 65, 65}, + }, + }, + } { + aFlat := flatten(test.a) + bFlat := flatten(test.b) + cFlat := flatten(test.c) + ansFlat := flatten(test.ans) + blasser.Dsymm(test.side, test.ul, test.m, test.n, test.alpha, aFlat, len(test.a[0]), bFlat, test.n, test.beta, cFlat, test.n) + if !floats.EqualApprox(cFlat, ansFlat, 1e-14) { + t.Errorf("Case %v: Want %v, got %v.", i, ansFlat, cFlat) + } + } +} diff --git a/blas/testblas/dsymv.go b/blas/testblas/dsymv.go new file mode 100644 index 00000000..f3b9e5d4 --- /dev/null +++ b/blas/testblas/dsymv.go @@ -0,0 +1,73 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dsymver interface { + Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) +} + +func DsymvTest(t *testing.T, blasser Dsymver) { + for i, test := range []struct { + ul blas.Uplo + n int + a [][]float64 + x []float64 + y []float64 + alpha float64 + beta float64 + ans []float64 + }{ + { + ul: blas.Upper, + n: 3, + a: [][]float64{ + {5, 6, 7}, + {0, 8, 10}, + {0, 0, 13}, + }, + x: []float64{3, 4, 5}, + y: []float64{6, 7, 8}, + alpha: 2.1, + beta: -3, + ans: []float64{137.4, 189, 240.6}, + }, + { + ul: blas.Lower, + n: 3, + a: [][]float64{ + {5, 0, 0}, + {6, 8, 0}, + {7, 10, 13}, + }, + x: []float64{3, 4, 5}, + y: []float64{6, 7, 8}, + alpha: 2.1, + beta: -3, + ans: []float64{137.4, 189, 240.6}, + }, + } { + incTest := func(incX, incY, extra int) { + x := makeIncremented(test.x, incX, extra) + y := makeIncremented(test.y, incY, extra) + aFlat := flatten(test.a) + ans := makeIncremented(test.ans, incY, extra) + + blasser.Dsymv(test.ul, test.n, test.alpha, aFlat, test.n, x, incX, test.beta, y, incY) + if !floats.EqualApprox(ans, y, 1e-14) { + t.Errorf("Case %v, incX=%v, incY=%v: Want %v, got %v.", i, incX, incY, ans, y) + } + } + incTest(1, 1, 0) + incTest(2, 3, 0) + incTest(3, 2, 0) + incTest(-3, 2, 0) + incTest(-2, 4, 0) + incTest(2, -1, 0) + incTest(-3, -4, 3) + } +} diff --git a/blas/testblas/dsyr.go b/blas/testblas/dsyr.go new file mode 100644 index 00000000..4b4959ff --- /dev/null +++ b/blas/testblas/dsyr.go @@ -0,0 +1,72 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" +) + +type Dsyrer interface { + Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) +} + +func DsyrTest(t *testing.T, blasser Dsyrer) { + for i, test := range []struct { + ul blas.Uplo + n int + a [][]float64 + x []float64 + alpha float64 + ans [][]float64 + }{ + { + ul: blas.Upper, + n: 4, + a: [][]float64{ + {10, 2, 0, 1}, + {0, 1, 2, 3}, + {0, 0, 9, 15}, + {0, 0, 0, -6}, + }, + x: []float64{1, 2, 0, 5}, + alpha: 8, + ans: [][]float64{ + {18, 18, 0, 41}, + {0, 33, 2, 83}, + {0, 0, 9, 15}, + {0, 0, 0, 194}, + }, + }, + { + ul: blas.Lower, + n: 3, + a: [][]float64{ + {10, 2, 0}, + {4, 1, 2}, + {2, 7, 9}, + }, + x: []float64{3, 0, 5}, + alpha: 8, + ans: [][]float64{ + {82, 2, 0}, + {4, 1, 2}, + {122, 7, 209}, + }, + }, + } { + incTest := func(incX, extra int) { + xnew := makeIncremented(test.x, incX, extra) + aFlat := flatten(test.a) + ans := flatten(test.ans) + lda := test.n + blasser.Dsyr(test.ul, test.n, test.alpha, xnew, incX, aFlat, lda) + if !dSliceTolEqual(aFlat, ans) { + t.Errorf("Case %v, idx %v: Want %v, got %v.", i, incX, ans, aFlat) + } + } + incTest(1, 3) + incTest(1, 0) + incTest(3, 2) + incTest(-2, 2) + } +} diff --git a/blas/testblas/dsyr2.go b/blas/testblas/dsyr2.go new file mode 100644 index 00000000..864e8548 --- /dev/null +++ b/blas/testblas/dsyr2.go @@ -0,0 +1,76 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dsyr2er interface { + Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) +} + +func Dsyr2Test(t *testing.T, blasser Dsyr2er) { + for i, test := range []struct { + n int + a [][]float64 + ul blas.Uplo + x []float64 + y []float64 + alpha float64 + ans [][]float64 + }{ + { + n: 3, + a: [][]float64{ + {7, 2, 4}, + {0, 3, 5}, + {0, 0, 6}, + }, + x: []float64{2, 3, 4}, + y: []float64{5, 6, 7}, + alpha: 2, + ul: blas.Upper, + ans: [][]float64{ + {47, 56, 72}, + {0, 75, 95}, + {0, 0, 118}, + }, + }, + { + n: 3, + a: [][]float64{ + {7, 0, 0}, + {2, 3, 0}, + {4, 5, 6}, + }, + x: []float64{2, 3, 4}, + y: []float64{5, 6, 7}, + alpha: 2, + ul: blas.Lower, + ans: [][]float64{ + {47, 0, 0}, + {56, 75, 0}, + {72, 95, 118}, + }, + }, + } { + incTest := func(incX, incY, extra int) { + aFlat := flatten(test.a) + x := makeIncremented(test.x, incX, extra) + y := makeIncremented(test.y, incY, extra) + blasser.Dsyr2(test.ul, test.n, test.alpha, x, incX, y, incY, aFlat, test.n) + ansFlat := flatten(test.ans) + if !floats.EqualApprox(aFlat, ansFlat, 1e-14) { + t.Errorf("Case %v, incX = %v, incY = %v. Want %v, got %v.", i, incX, incY, ansFlat, aFlat) + } + } + incTest(1, 1, 0) + incTest(-2, 1, 0) + incTest(-2, 3, 0) + incTest(2, -3, 0) + incTest(3, -2, 0) + incTest(-3, -4, 0) + } +} diff --git a/blas/testblas/dsyr2k.go b/blas/testblas/dsyr2k.go new file mode 100644 index 00000000..f1e7de68 --- /dev/null +++ b/blas/testblas/dsyr2k.go @@ -0,0 +1,201 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dsyr2ker interface { + Dsyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) +} + +func Dsyr2kTest(t *testing.T, blasser Dsyr2ker) { + for i, test := range []struct { + ul blas.Uplo + tA blas.Transpose + n int + k int + alpha float64 + a [][]float64 + b [][]float64 + c [][]float64 + beta float64 + ans [][]float64 + }{ + { + ul: blas.Upper, + tA: blas.NoTrans, + n: 3, + k: 2, + alpha: 0, + a: [][]float64{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + b: [][]float64{ + {7, 8}, + {9, 10}, + {11, 12}, + }, + c: [][]float64{ + {1, 2, 3}, + {0, 5, 6}, + {0, 0, 9}, + }, + beta: 2, + ans: [][]float64{ + {2, 4, 6}, + {0, 10, 12}, + {0, 0, 18}, + }, + }, + { + ul: blas.Lower, + tA: blas.NoTrans, + n: 3, + k: 2, + alpha: 0, + a: [][]float64{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + b: [][]float64{ + {7, 8}, + {9, 10}, + {11, 12}, + }, + c: [][]float64{ + {1, 0, 0}, + {2, 3, 0}, + {4, 5, 6}, + }, + beta: 2, + ans: [][]float64{ + {2, 0, 0}, + {4, 6, 0}, + {8, 10, 12}, + }, + }, + { + ul: blas.Upper, + tA: blas.NoTrans, + n: 3, + k: 2, + alpha: 3, + a: [][]float64{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + b: [][]float64{ + {7, 8}, + {9, 10}, + {11, 12}, + }, + c: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + beta: 2, + ans: [][]float64{ + {140, 250, 360}, + {0, 410, 568}, + {0, 0, 774}, + }, + }, + { + ul: blas.Lower, + tA: blas.NoTrans, + n: 3, + k: 2, + alpha: 3, + a: [][]float64{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + b: [][]float64{ + {7, 8}, + {9, 10}, + {11, 12}, + }, + c: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + beta: 2, + ans: [][]float64{ + {140, 0, 0}, + {250, 410, 0}, + {360, 568, 774}, + }, + }, + { + ul: blas.Upper, + tA: blas.Trans, + n: 3, + k: 2, + alpha: 3, + a: [][]float64{ + {1, 3, 5}, + {2, 4, 6}, + }, + b: [][]float64{ + {7, 9, 11}, + {8, 10, 12}, + }, + c: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + beta: 2, + ans: [][]float64{ + {140, 250, 360}, + {0, 410, 568}, + {0, 0, 774}, + }, + }, + { + ul: blas.Lower, + tA: blas.Trans, + n: 3, + k: 2, + alpha: 3, + a: [][]float64{ + {1, 3, 5}, + {2, 4, 6}, + }, + b: [][]float64{ + {7, 9, 11}, + {8, 10, 12}, + }, + c: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + beta: 2, + ans: [][]float64{ + {140, 0, 0}, + {250, 410, 0}, + {360, 568, 774}, + }, + }, + } { + aFlat := flatten(test.a) + bFlat := flatten(test.b) + cFlat := flatten(test.c) + ansFlat := flatten(test.ans) + blasser.Dsyr2k(test.ul, test.tA, test.n, test.k, test.alpha, aFlat, len(test.a[0]), bFlat, len(test.b[0]), test.beta, cFlat, len(test.c[0])) + if !floats.EqualApprox(ansFlat, cFlat, 1e-14) { + t.Errorf("Case %v. Want %v, got %v.", i, ansFlat, cFlat) + } + } +} diff --git a/blas/testblas/dsyrk.go b/blas/testblas/dsyrk.go new file mode 100644 index 00000000..23e49694 --- /dev/null +++ b/blas/testblas/dsyrk.go @@ -0,0 +1,171 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dsyker interface { + Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) +} + +func DsyrkTest(t *testing.T, blasser Dsyker) { + for i, test := range []struct { + ul blas.Uplo + tA blas.Transpose + n int + k int + alpha float64 + a [][]float64 + c [][]float64 + beta float64 + ans [][]float64 + }{ + { + ul: blas.Upper, + tA: blas.NoTrans, + n: 3, + k: 2, + alpha: 0, + a: [][]float64{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + c: [][]float64{ + {1, 2, 3}, + {0, 5, 6}, + {0, 0, 9}, + }, + beta: 2, + ans: [][]float64{ + {2, 4, 6}, + {0, 10, 12}, + {0, 0, 18}, + }, + }, + { + ul: blas.Lower, + tA: blas.NoTrans, + n: 3, + k: 2, + alpha: 0, + a: [][]float64{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + c: [][]float64{ + {1, 0, 0}, + {2, 3, 0}, + {4, 5, 6}, + }, + beta: 2, + ans: [][]float64{ + {2, 0, 0}, + {4, 6, 0}, + {8, 10, 12}, + }, + }, + { + ul: blas.Upper, + tA: blas.NoTrans, + n: 3, + k: 2, + alpha: 3, + a: [][]float64{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + c: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + beta: 2, + ans: [][]float64{ + {17, 37, 57}, + {0, 83, 127}, + {0, 0, 195}, + }, + }, + { + ul: blas.Lower, + tA: blas.NoTrans, + n: 3, + k: 2, + alpha: 3, + a: [][]float64{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + c: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + beta: 2, + ans: [][]float64{ + {17, 0, 0}, + {37, 83, 0}, + {57, 127, 195}, + }, + }, + { + ul: blas.Upper, + tA: blas.Trans, + n: 3, + k: 2, + alpha: 3, + a: [][]float64{ + {1, 3, 5}, + {2, 4, 6}, + }, + c: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + beta: 2, + ans: [][]float64{ + {17, 37, 57}, + {0, 83, 127}, + {0, 0, 195}, + }, + }, + { + ul: blas.Lower, + tA: blas.Trans, + n: 3, + k: 2, + alpha: 3, + a: [][]float64{ + {1, 3, 5}, + {2, 4, 6}, + }, + c: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + beta: 2, + ans: [][]float64{ + {17, 0, 0}, + {37, 83, 0}, + {57, 127, 195}, + }, + }, + } { + aFlat := flatten(test.a) + cFlat := flatten(test.c) + ansFlat := flatten(test.ans) + blasser.Dsyrk(test.ul, test.tA, test.n, test.k, test.alpha, aFlat, len(test.a[0]), test.beta, cFlat, len(test.c[0])) + if !floats.EqualApprox(ansFlat, cFlat, 1e-14) { + t.Errorf("Case %v. Want %v, got %v.", i, ansFlat, cFlat) + } + } +} diff --git a/blas/testblas/dtbmv.go b/blas/testblas/dtbmv.go new file mode 100644 index 00000000..497aaa0c --- /dev/null +++ b/blas/testblas/dtbmv.go @@ -0,0 +1,123 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" +) + +type Dtbmver interface { + Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) +} + +func DtbmvTest(t *testing.T, blasser Dtbmver) { + for i, test := range []struct { + ul blas.Uplo + tA blas.Transpose + d blas.Diag + n int + k int + a [][]float64 + x []float64 + ans []float64 + }{ + { + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + n: 3, + k: 1, + a: [][]float64{ + {1, 2, 0}, + {0, 1, 4}, + {0, 0, 1}, + }, + x: []float64{2, 3, 4}, + ans: []float64{8, 19, 4}, + }, + { + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + n: 5, + k: 1, + a: [][]float64{ + {1, 3, 0, 0, 0}, + {0, 6, 7, 0, 0}, + {0, 0, 2, 1, 0}, + {0, 0, 0, 12, 3}, + {0, 0, 0, 0, -1}, + }, + x: []float64{1, 2, 3, 4, 5}, + ans: []float64{7, 33, 10, 63, -5}, + }, + { + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + n: 5, + k: 1, + a: [][]float64{ + {7, 0, 0, 0, 0}, + {3, 6, 0, 0, 0}, + {0, 7, 2, 0, 0}, + {0, 0, 1, 12, 0}, + {0, 0, 0, 3, -1}, + }, + x: []float64{1, 2, 3, 4, 5}, + ans: []float64{7, 15, 20, 51, 7}, + }, + { + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {7, 3, 9, 0, 0}, + {0, 6, 7, 10, 0}, + {0, 0, 2, 1, 11}, + {0, 0, 0, 12, 3}, + {0, 0, 0, 0, -1}, + }, + x: []float64{1, 2, 3, 4, 5}, + ans: []float64{7, 15, 29, 71, 40}, + }, + { + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {7, 0, 0, 0, 0}, + {3, 6, 0, 0, 0}, + {9, 7, 2, 0, 0}, + {0, 10, 1, 12, 0}, + {0, 0, 11, 3, -1}, + }, + x: []float64{1, 2, 3, 4, 5}, + ans: []float64{40, 73, 65, 63, -5}, + }, + } { + extra := 0 + var aFlat []float64 + if test.ul == blas.Upper { + aFlat = flattenBanded(test.a, test.k, 0) + } else { + aFlat = flattenBanded(test.a, 0, test.k) + } + incTest := func(incX, extra int) { + xnew := makeIncremented(test.x, incX, extra) + ans := makeIncremented(test.ans, incX, extra) + lda := test.k + 1 + blasser.Dtbmv(test.ul, test.tA, test.d, test.n, test.k, aFlat, lda, xnew, incX) + if !dSliceTolEqual(ans, xnew) { + t.Errorf("Case %v, Inc %v: Want %v, got %v", i, incX, ans, xnew) + } + } + incTest(1, extra) + incTest(3, extra) + incTest(-2, extra) + } +} diff --git a/blas/testblas/dtbsv.go b/blas/testblas/dtbsv.go new file mode 100644 index 00000000..1e08a8d7 --- /dev/null +++ b/blas/testblas/dtbsv.go @@ -0,0 +1,256 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" +) + +type Dtbsver interface { + Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) + Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) +} + +func DtbsvTest(t *testing.T, blasser Dtbsver) { + for i, test := range []struct { + ul blas.Uplo + tA blas.Transpose + d blas.Diag + n, k int + a [][]float64 + lda int + x []float64 + incX int + ans []float64 + }{ + { + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + n: 5, + k: 1, + a: [][]float64{ + {1, 3, 0, 0, 0}, + {0, 6, 7, 0, 0}, + {0, 0, 2, 1, 0}, + {0, 0, 0, 12, 3}, + {0, 0, 0, 0, -1}, + }, + x: []float64{1, 2, 3, 4, 5}, + incX: 1, + ans: []float64{2.479166666666667, -0.493055555555556, 0.708333333333333, 1.583333333333333, -5.000000000000000}, + }, + { + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {1, 3, 5, 0, 0}, + {0, 6, 7, 5, 0}, + {0, 0, 2, 1, 5}, + {0, 0, 0, 12, 3}, + {0, 0, 0, 0, -1}, + }, + x: []float64{1, 2, 3, 4, 5}, + incX: 1, + ans: []float64{-15.854166666666664, -16.395833333333336, 13.208333333333334, 1.583333333333333, -5.000000000000000}, + }, + { + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + n: 5, + k: 1, + a: [][]float64{ + {1, 3, 0, 0, 0}, + {0, 6, 7, 0, 0}, + {0, 0, 2, 1, 0}, + {0, 0, 0, 12, 3}, + {0, 0, 0, 0, -1}, + }, + x: []float64{1, -101, 2, -201, 3, -301, 4, -401, 5, -501, -601, -701}, + incX: 2, + ans: []float64{2.479166666666667, -101, -0.493055555555556, -201, 0.708333333333333, -301, 1.583333333333333, -401, -5.000000000000000, -501, -601, -701}, + }, + { + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {1, 3, 5, 0, 0}, + {0, 6, 7, 5, 0}, + {0, 0, 2, 1, 5}, + {0, 0, 0, 12, 3}, + {0, 0, 0, 0, -1}, + }, + x: []float64{1, -101, 2, -201, 3, -301, 4, -401, 5, -501, -601, -701}, + incX: 2, + ans: []float64{-15.854166666666664, -101, -16.395833333333336, -201, 13.208333333333334, -301, 1.583333333333333, -401, -5.000000000000000, -501, -601, -701}, + }, + { + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {1, 0, 0, 0, 0}, + {3, 6, 0, 0, 0}, + {5, 7, 2, 0, 0}, + {0, 5, 1, 12, 0}, + {0, 0, 5, 3, -1}, + }, + x: []float64{1, 2, 3, 4, 5}, + incX: 1, + ans: []float64{1, -0.166666666666667, -0.416666666666667, 0.437500000000000, -5.770833333333334}, + }, + { + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {1, 0, 0, 0, 0}, + {3, 6, 0, 0, 0}, + {5, 7, 2, 0, 0}, + {0, 5, 1, 12, 0}, + {0, 0, 5, 3, -1}, + }, + x: []float64{1, -101, 2, -201, 3, -301, 4, -401, 5, -501, -601, -701}, + incX: 2, + ans: []float64{1, -101, -0.166666666666667, -201, -0.416666666666667, -301, 0.437500000000000, -401, -5.770833333333334, -501, -601, -701}, + }, + { + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {1, 3, 5, 0, 0}, + {0, 6, 7, 5, 0}, + {0, 0, 2, 1, 5}, + {0, 0, 0, 12, 3}, + {0, 0, 0, 0, -1}, + }, + x: []float64{1, 2, 3, 4, 5}, + incX: 1, + ans: []float64{1, -0.166666666666667, -0.416666666666667, 0.437500000000000, -5.770833333333334}, + }, + { + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {1, 3, 5, 0, 0}, + {0, 6, 7, 5, 0}, + {0, 0, 2, 1, 5}, + {0, 0, 0, 12, 3}, + {0, 0, 0, 0, -1}, + }, + x: []float64{1, -101, 2, -201, 3, -301, 4, -401, 5, -501, -601, -701}, + incX: 2, + ans: []float64{1, -101, -0.166666666666667, -201, -0.416666666666667, -301, 0.437500000000000, -401, -5.770833333333334, -501, -601, -701}, + }, + { + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {1, 0, 0, 0, 0}, + {3, 6, 0, 0, 0}, + {5, 7, 2, 0, 0}, + {0, 5, 1, 12, 0}, + {0, 0, 5, 3, -1}, + }, + x: []float64{1, 2, 3, 4, 5}, + incX: 1, + ans: []float64{-15.854166666666664, -16.395833333333336, 13.208333333333334, 1.583333333333333, -5.000000000000000}, + }, + { + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + n: 5, + k: 2, + a: [][]float64{ + {1, 0, 0, 0, 0}, + {3, 6, 0, 0, 0}, + {5, 7, 2, 0, 0}, + {0, 5, 1, 12, 0}, + {0, 0, 5, 3, -1}, + }, + x: []float64{1, -101, 2, -201, 3, -301, 4, -401, 5, -501, -601, -701}, + incX: 2, + ans: []float64{-15.854166666666664, -101, -16.395833333333336, -201, 13.208333333333334, -301, 1.583333333333333, -401, -5.000000000000000, -501, -601, -701}, + }, + } { + var aFlat []float64 + if test.ul == blas.Upper { + aFlat = flattenBanded(test.a, test.k, 0) + } else { + aFlat = flattenBanded(test.a, 0, test.k) + } + xCopy := sliceCopy(test.x) + // TODO: Have tests where the banded matrix is constructed explicitly + // to allow testing for lda =! k+1 + blasser.Dtbsv(test.ul, test.tA, test.d, test.n, test.k, aFlat, test.k+1, xCopy, test.incX) + if !dSliceTolEqual(test.ans, xCopy) { + t.Errorf("Case %v: Want %v, got %v", i, test.ans, xCopy) + } + } + + /* + // TODO: Uncomment when Dtrsv is fixed + // Compare with dense for larger matrices + for _, ul := range [...]blas.Uplo{blas.Upper, blas.Lower} { + for _, tA := range [...]blas.Transpose{blas.NoTrans, blas.Trans} { + for _, n := range [...]int{7, 8, 11} { + for _, d := range [...]blas.Diag{blas.NonUnit, blas.Unit} { + for _, k := range [...]int{0, 1, 3} { + for _, incX := range [...]int{1, 3} { + a := make([][]float64, n) + for i := range a { + a[i] = make([]float64, n) + for j := range a[i] { + a[i][j] = rand.Float64() + } + } + x := make([]float64, n) + for i := range x { + x[i] = rand.Float64() + } + extra := 3 + xinc := makeIncremented(x, incX, extra) + bandX := sliceCopy(xinc) + var aFlatBand []float64 + if ul == blas.Upper { + aFlatBand = flattenBanded(a, k, 0) + } else { + aFlatBand = flattenBanded(a, 0, k) + } + blasser.Dtbsv(ul, tA, d, n, k, aFlatBand, k+1, bandX, incX) + + aFlatDense := flatten(a) + denseX := sliceCopy(xinc) + blasser.Dtrsv(ul, tA, d, n, aFlatDense, n, denseX, incX) + if !dSliceTolEqual(denseX, bandX) { + t.Errorf("Case %v: dense banded mismatch") + } + } + } + } + } + } + } + */ +} diff --git a/blas/testblas/dtpmv.go b/blas/testblas/dtpmv.go new file mode 100644 index 00000000..a031f319 --- /dev/null +++ b/blas/testblas/dtpmv.go @@ -0,0 +1,129 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dtpmver interface { + Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) +} + +func DtpmvTest(t *testing.T, blasser Dtpmver) { + for i, test := range []struct { + n int + a [][]float64 + x []float64 + d blas.Diag + ul blas.Uplo + tA blas.Transpose + ans []float64 + }{ + { + n: 3, + a: [][]float64{ + {5, 6, 7}, + {0, 9, 10}, + {0, 0, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.NonUnit, + ul: blas.Upper, + tA: blas.NoTrans, + ans: []float64{74, 86, 65}, + }, + { + n: 3, + a: [][]float64{ + {5, 6, 7}, + {0, 9, 10}, + {0, 0, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.Unit, + ul: blas.Upper, + tA: blas.NoTrans, + ans: []float64{62, 54, 5}, + }, + { + n: 3, + a: [][]float64{ + {5, 0, 0}, + {6, 9, 0}, + {7, 10, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.NonUnit, + ul: blas.Lower, + tA: blas.NoTrans, + ans: []float64{15, 54, 126}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {6, 1, 0}, + {7, 10, 1}, + }, + x: []float64{3, 4, 5}, + d: blas.Unit, + ul: blas.Lower, + tA: blas.NoTrans, + ans: []float64{3, 22, 66}, + }, + { + n: 3, + a: [][]float64{ + {5, 6, 7}, + {0, 9, 10}, + {0, 0, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.NonUnit, + ul: blas.Upper, + tA: blas.Trans, + ans: []float64{15, 54, 126}, + }, + { + n: 3, + a: [][]float64{ + {1, 6, 7}, + {0, 1, 10}, + {0, 0, 1}, + }, + x: []float64{3, 4, 5}, + d: blas.Unit, + ul: blas.Upper, + tA: blas.Trans, + ans: []float64{3, 22, 66}, + }, + { + n: 3, + a: [][]float64{ + {5, 0, 0}, + {6, 9, 0}, + {7, 10, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.NonUnit, + ul: blas.Lower, + tA: blas.Trans, + ans: []float64{74, 86, 65}, + }, + } { + incTest := func(incX, extra int) { + aFlat := flattenTriangular(test.a, test.ul) + x := makeIncremented(test.x, incX, extra) + blasser.Dtpmv(test.ul, test.tA, test.d, test.n, aFlat, x, incX) + ans := makeIncremented(test.ans, incX, extra) + if !floats.EqualApprox(x, ans, 1e-14) { + t.Errorf("Case %v, idx %v: Want %v, got %v.", i, incX, ans, x) + } + } + incTest(1, 0) + incTest(-3, 3) + incTest(4, 3) + } +} diff --git a/blas/testblas/dtpsv.go b/blas/testblas/dtpsv.go new file mode 100644 index 00000000..d29ff965 --- /dev/null +++ b/blas/testblas/dtpsv.go @@ -0,0 +1,144 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dtpsver interface { + Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) +} + +func DtpsvTest(t *testing.T, blasser Dtpsver) { + for i, test := range []struct { + n int + a [][]float64 + ul blas.Uplo + tA blas.Transpose + d blas.Diag + x []float64 + ans []float64 + }{ + { + n: 3, + a: [][]float64{ + {1, 2, 3}, + {0, 8, 15}, + {0, 0, 8}, + }, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + x: []float64{5, 6, 7}, + ans: []float64{4.15625, -0.890625, 0.875}, + }, + { + n: 3, + a: [][]float64{ + {1, 2, 3}, + {0, 1, 15}, + {0, 0, 1}, + }, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + x: []float64{5, 6, 7}, + ans: []float64{182, -99, 7}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {2, 8, 0}, + {3, 15, 8}, + }, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + x: []float64{5, 6, 7}, + ans: []float64{5, -0.5, -0.0625}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {2, 8, 0}, + {3, 15, 8}, + }, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + x: []float64{5, 6, 7}, + ans: []float64{5, -4, 52}, + }, + { + n: 3, + a: [][]float64{ + {1, 2, 3}, + {0, 8, 15}, + {0, 0, 8}, + }, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + x: []float64{5, 6, 7}, + ans: []float64{5, -0.5, -0.0625}, + }, + { + n: 3, + a: [][]float64{ + {1, 2, 3}, + {0, 8, 15}, + {0, 0, 8}, + }, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + x: []float64{5, 6, 7}, + ans: []float64{5, -4, 52}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {2, 8, 0}, + {3, 15, 8}, + }, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + x: []float64{5, 6, 7}, + ans: []float64{4.15625, -0.890625, 0.875}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {2, 1, 0}, + {3, 15, 1}, + }, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + x: []float64{5, 6, 7}, + ans: []float64{182, -99, 7}, + }, + } { + incTest := func(incX, extra int) { + aFlat := flattenTriangular(test.a, test.ul) + x := makeIncremented(test.x, incX, extra) + blasser.Dtpsv(test.ul, test.tA, test.d, test.n, aFlat, x, incX) + ans := makeIncremented(test.ans, incX, extra) + if !floats.EqualApprox(x, ans, 1e-14) { + t.Errorf("Case %v, incX = %v: Want %v, got %v.", i, incX, ans, x) + } + } + incTest(1, 0) + incTest(-2, 0) + incTest(3, 0) + incTest(-3, 8) + incTest(4, 2) + } +} diff --git a/blas/testblas/dtrmm.go b/blas/testblas/dtrmm.go new file mode 100644 index 00000000..070eca5a --- /dev/null +++ b/blas/testblas/dtrmm.go @@ -0,0 +1,806 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dtrmmer interface { + Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) +} + +func DtrmmTest(t *testing.T, blasser Dtrmmer) { + for i, test := range []struct { + s blas.Side + ul blas.Uplo + tA blas.Transpose + d blas.Diag + m int + n int + alpha float64 + a [][]float64 + b [][]float64 + ans [][]float64 + }{ + { + s: blas.Left, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3, 4}, + {0, 5, 6, 7}, + {0, 0, 8, 9}, + {0, 0, 0, 10}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {320, 340, 360}, + {588, 624, 660}, + {598, 632, 666}, + {380, 400, 420}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2}, + {0, 5}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {72, 78, 84}, + {130, 140, 150}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3, 4}, + {0, 5, 6, 7}, + {0, 0, 8, 9}, + {0, 0, 0, 10}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {320, 340, 360}, + {484, 512, 540}, + {374, 394, 414}, + {38, 40, 42}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2}, + {0, 5}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {72, 78, 84}, + {26, 28, 30}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0, 0}, + {2, 5, 0, 0}, + {3, 6, 8, 0}, + {4, 7, 9, 10}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {20, 22, 24}, + {170, 184, 198}, + {472, 506, 540}, + {930, 990, 1050}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0}, + {2, 5}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {20, 22, 24}, + {170, 184, 198}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0, 0}, + {2, 5, 0, 0}, + {3, 6, 8, 0}, + {4, 7, 9, 10}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {20, 22, 24}, + {66, 72, 78}, + {248, 268, 288}, + {588, 630, 672}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0}, + {2, 5}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {20, 22, 24}, + {66, 72, 78}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3, 4}, + {0, 5, 6, 7}, + {0, 0, 8, 9}, + {0, 0, 0, 10}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {20, 22, 24}, + {170, 184, 198}, + {472, 506, 540}, + {930, 990, 1050}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2}, + {0, 5}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {20, 22, 24}, + {170, 184, 198}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3, 4}, + {0, 5, 6, 7}, + {0, 0, 8, 9}, + {0, 0, 0, 10}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {20, 22, 24}, + {66, 72, 78}, + {248, 268, 288}, + {588, 630, 672}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2}, + {0, 5}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {20, 22, 24}, + {66, 72, 78}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0, 0}, + {2, 5, 0, 0}, + {3, 6, 8, 0}, + {4, 7, 9, 10}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {320, 340, 360}, + {588, 624, 660}, + {598, 632, 666}, + {380, 400, 420}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0}, + {2, 5}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {72, 78, 84}, + {130, 140, 150}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0, 0}, + {2, 5, 0, 0}, + {3, 6, 8, 0}, + {4, 7, 9, 10}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {320, 340, 360}, + {484, 512, 540}, + {374, 394, 414}, + {38, 40, 42}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0}, + {2, 5}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {72, 78, 84}, + {26, 28, 30}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {20, 128, 314}, + {26, 164, 398}, + {32, 200, 482}, + {38, 236, 566}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {20, 128, 314}, + {26, 164, 398}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {20, 62, 194}, + {26, 80, 248}, + {32, 98, 302}, + {38, 116, 356}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {20, 62, 194}, + {26, 80, 248}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {136, 208, 144}, + {172, 262, 180}, + {208, 316, 216}, + {244, 370, 252}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {136, 208, 144}, + {172, 262, 180}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {136, 142, 24}, + {172, 178, 30}, + {208, 214, 36}, + {244, 250, 42}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {136, 142, 24}, + {172, 178, 30}, + }, + }, + + { + s: blas.Right, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {136, 208, 144}, + {172, 262, 180}, + {208, 316, 216}, + {244, 370, 252}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {136, 208, 144}, + {172, 262, 180}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {136, 142, 24}, + {172, 178, 30}, + {208, 214, 36}, + {244, 250, 42}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {136, 142, 24}, + {172, 178, 30}, + }, + }, + + { + s: blas.Right, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {20, 128, 314}, + {26, 164, 398}, + {32, 200, 482}, + {38, 236, 566}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {20, 128, 314}, + {26, 164, 398}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {20, 62, 194}, + {26, 80, 248}, + {32, 98, 302}, + {38, 116, 356}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 2, + a: [][]float64{ + {1, 0, 0}, + {2, 4, 0}, + {3, 5, 6}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {20, 62, 194}, + {26, 80, 248}, + }, + }, + } { + aFlat := flatten(test.a) + bFlat := flatten(test.b) + ansFlat := flatten(test.ans) + blasser.Dtrmm(test.s, test.ul, test.tA, test.d, test.m, test.n, test.alpha, aFlat, len(test.a[0]), bFlat, len(test.b[0])) + if !floats.EqualApprox(ansFlat, bFlat, 1e-14) { + t.Errorf("Case %v. Want %v, got %v.", i, ansFlat, bFlat) + } + } +} diff --git a/blas/testblas/dtrmv.go b/blas/testblas/dtrmv.go new file mode 100644 index 00000000..bdf82c60 --- /dev/null +++ b/blas/testblas/dtrmv.go @@ -0,0 +1,147 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dtrmver interface { + Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) +} + +func DtrmvTest(t *testing.T, blasser Dtrmver) { + for i, test := range []struct { + n int + a [][]float64 + x []float64 + d blas.Diag + ul blas.Uplo + tA blas.Transpose + ans []float64 + }{ + { + n: 1, + a: [][]float64{{5}}, + x: []float64{2}, + d: blas.NonUnit, + ul: blas.Upper, + tA: blas.NoTrans, + ans: []float64{10}, + }, + { + n: 1, + a: [][]float64{{5}}, + x: []float64{2}, + d: blas.Unit, + ul: blas.Upper, + tA: blas.NoTrans, + ans: []float64{2}, + }, + { + n: 3, + a: [][]float64{ + {5, 6, 7}, + {0, 9, 10}, + {0, 0, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.NonUnit, + ul: blas.Upper, + tA: blas.NoTrans, + ans: []float64{74, 86, 65}, + }, + { + n: 3, + a: [][]float64{ + {5, 6, 7}, + {0, 9, 10}, + {0, 0, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.Unit, + ul: blas.Upper, + tA: blas.NoTrans, + ans: []float64{62, 54, 5}, + }, + { + n: 3, + a: [][]float64{ + {5, 0, 0}, + {6, 9, 0}, + {7, 10, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.NonUnit, + ul: blas.Lower, + tA: blas.NoTrans, + ans: []float64{15, 54, 126}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {6, 1, 0}, + {7, 10, 1}, + }, + x: []float64{3, 4, 5}, + d: blas.Unit, + ul: blas.Lower, + tA: blas.NoTrans, + ans: []float64{3, 22, 66}, + }, + { + n: 3, + a: [][]float64{ + {5, 6, 7}, + {0, 9, 10}, + {0, 0, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.NonUnit, + ul: blas.Upper, + tA: blas.Trans, + ans: []float64{15, 54, 126}, + }, + { + n: 3, + a: [][]float64{ + {1, 6, 7}, + {0, 1, 10}, + {0, 0, 1}, + }, + x: []float64{3, 4, 5}, + d: blas.Unit, + ul: blas.Upper, + tA: blas.Trans, + ans: []float64{3, 22, 66}, + }, + { + n: 3, + a: [][]float64{ + {5, 0, 0}, + {6, 9, 0}, + {7, 10, 13}, + }, + x: []float64{3, 4, 5}, + d: blas.NonUnit, + ul: blas.Lower, + tA: blas.Trans, + ans: []float64{74, 86, 65}, + }, + } { + incTest := func(incX, extra int) { + aFlat := flatten(test.a) + x := makeIncremented(test.x, incX, extra) + blasser.Dtrmv(test.ul, test.tA, test.d, test.n, aFlat, test.n, x, incX) + ans := makeIncremented(test.ans, incX, extra) + if !floats.EqualApprox(x, ans, 1e-14) { + t.Errorf("Case %v, idx %v: Want %v, got %v.", i, incX, ans, x) + } + } + incTest(1, 3) + incTest(-3, 3) + incTest(4, 3) + } +} diff --git a/blas/testblas/dtrmvbench.go b/blas/testblas/dtrmvbench.go new file mode 100644 index 00000000..9f9f04c1 --- /dev/null +++ b/blas/testblas/dtrmvbench.go @@ -0,0 +1,30 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package testblas + +import ( + "math/rand" + "testing" + + "github.com/gonum/blas" +) + +func DtrmvBenchmark(b *testing.B, dtrmv Dtrmver, n, lda, incX int, ul blas.Uplo, tA blas.Transpose, d blas.Diag) { + rnd := rand.New(rand.NewSource(0)) + a := make([]float64, n*lda) + for i := range a { + a[i] = rnd.Float64() + } + + x := make([]float64, n*incX) + for i := range x { + x[i] = rnd.Float64() + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + dtrmv.Dtrmv(ul, tA, d, n, a, lda, x, incX) + } +} diff --git a/blas/testblas/dtrsm.go b/blas/testblas/dtrsm.go new file mode 100644 index 00000000..f00b488e --- /dev/null +++ b/blas/testblas/dtrsm.go @@ -0,0 +1,811 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dtrsmer interface { + Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, + alpha float64, a []float64, lda int, b []float64, ldb int) +} + +func DtrsmTest(t *testing.T, blasser Dtrsmer) { + for i, test := range []struct { + s blas.Side + ul blas.Uplo + tA blas.Transpose + d blas.Diag + m int + n int + alpha float64 + a [][]float64 + b [][]float64 + ans [][]float64 + }{ + { + s: blas.Left, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 3, + n: 2, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 5}, + }, + b: [][]float64{ + {3, 6}, + {4, 7}, + {5, 8}, + }, + ans: [][]float64{ + {1, 3.4}, + {-0.5, -0.5}, + {2, 3.2}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + m: 3, + n: 2, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 5}, + }, + b: [][]float64{ + {3, 6}, + {4, 7}, + {5, 8}, + }, + ans: [][]float64{ + {60, 96}, + {-42, -66}, + {10, 16}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 3, + n: 4, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 5}, + }, + b: [][]float64{ + {3, 6, 2, 9}, + {4, 7, 1, 3}, + {5, 8, 9, 10}, + }, + ans: [][]float64{ + {1, 3.4, 1.2, 13}, + {-0.5, -0.5, -4, -3.5}, + {2, 3.2, 3.6, 4}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + m: 3, + n: 4, + alpha: 2, + a: [][]float64{ + {1, 2, 3}, + {0, 4, 5}, + {0, 0, 5}, + }, + b: [][]float64{ + {3, 6, 2, 9}, + {4, 7, 1, 3}, + {5, 8, 9, 10}, + }, + ans: [][]float64{ + {60, 96, 126, 146}, + {-42, -66, -88, -94}, + {10, 16, 18, 20}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 3, + n: 2, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 4, 0}, + {5, 6, 7}, + }, + b: [][]float64{ + {3, 6}, + {4, 7}, + {5, 8}, + }, + ans: [][]float64{ + {4.5, 9}, + {-0.375, -1.5}, + {-0.75, -12.0 / 7}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + m: 3, + n: 2, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 4, 0}, + {5, 6, 7}, + }, + b: [][]float64{ + {3, 6}, + {4, 7}, + {5, 8}, + }, + ans: [][]float64{ + {9, 18}, + {-15, -33}, + {60, 132}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 3, + n: 4, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 4, 0}, + {5, 6, 7}, + }, + b: [][]float64{ + {3, 6, 2, 9}, + {4, 7, 1, 3}, + {5, 8, 9, 10}, + }, + ans: [][]float64{ + {4.5, 9, 3, 13.5}, + {-0.375, -1.5, -1.5, -63.0 / 8}, + {-0.75, -12.0 / 7, 3, 39.0 / 28}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + m: 3, + n: 4, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 4, 0}, + {5, 6, 7}, + }, + b: [][]float64{ + {3, 6, 2, 9}, + {4, 7, 1, 3}, + {5, 8, 9, 10}, + }, + ans: [][]float64{ + {9, 18, 6, 27}, + {-15, -33, -15, -72}, + {60, 132, 87, 327}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + m: 3, + n: 2, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 7}, + }, + b: [][]float64{ + {3, 6}, + {4, 7}, + {5, 8}, + }, + ans: [][]float64{ + {4.5, 9}, + {-0.30, -1.2}, + {-6.0 / 35, -24.0 / 35}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + m: 3, + n: 2, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 7}, + }, + b: [][]float64{ + {3, 6}, + {4, 7}, + {5, 8}, + }, + ans: [][]float64{ + {9, 18}, + {-15, -33}, + {69, 150}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + m: 3, + n: 4, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 7}, + }, + b: [][]float64{ + {3, 6, 6, 7}, + {4, 7, 8, 9}, + {5, 8, 10, 11}, + }, + ans: [][]float64{ + {4.5, 9, 9, 10.5}, + {-0.3, -1.2, -0.6, -0.9}, + {-6.0 / 35, -24.0 / 35, -12.0 / 35, -18.0 / 35}, + }, + }, + { + s: blas.Left, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + m: 3, + n: 4, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 7}, + }, + b: [][]float64{ + {3, 6, 6, 7}, + {4, 7, 8, 9}, + {5, 8, 10, 11}, + }, + ans: [][]float64{ + {9, 18, 18, 21}, + {-15, -33, -30, -36}, + {69, 150, 138, 165}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + m: 3, + n: 2, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 4, 0}, + {5, 6, 8}, + }, + b: [][]float64{ + {3, 6}, + {4, 7}, + {5, 8}, + }, + ans: [][]float64{ + {-0.46875, 0.375}, + {0.1875, 0.75}, + {1.875, 3}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + m: 3, + n: 2, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 4, 0}, + {5, 6, 8}, + }, + b: [][]float64{ + {3, 6}, + {4, 7}, + {5, 8}, + }, + ans: [][]float64{ + {168, 267}, + {-78, -123}, + {15, 24}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + m: 3, + n: 4, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 4, 0}, + {5, 6, 8}, + }, + b: [][]float64{ + {3, 6, 2, 3}, + {4, 7, 4, 5}, + {5, 8, 6, 7}, + }, + ans: [][]float64{ + {-0.46875, 0.375, -2.0625, -1.78125}, + {0.1875, 0.75, -0.375, -0.1875}, + {1.875, 3, 2.25, 2.625}, + }, + }, + { + s: blas.Left, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + m: 3, + n: 4, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 4, 0}, + {5, 6, 8}, + }, + b: [][]float64{ + {3, 6, 2, 3}, + {4, 7, 4, 5}, + {5, 8, 6, 7}, + }, + ans: [][]float64{ + {168, 267, 204, 237}, + {-78, -123, -96, -111}, + {15, 24, 18, 21}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 7}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {15, -2.4, -48.0 / 35}, + {19.5, -3.3, -66.0 / 35}, + {24, -4.2, -2.4}, + {28.5, -5.1, -102.0 / 35}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {30, -57, 258}, + {39, -75, 339}, + {48, -93, 420}, + {57, -111, 501}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 7}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {15, -2.4, -48.0 / 35}, + {19.5, -3.3, -66.0 / 35}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {30, -57, 258}, + {39, -75, 339}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 5, 0}, + {4, 6, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {4.2, 1.2, 4.5}, + {5.775, 1.65, 5.625}, + {7.35, 2.1, 6.75}, + {8.925, 2.55, 7.875}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 5, 0}, + {4, 6, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {435, -183, 36}, + {543, -228, 45}, + {651, -273, 54}, + {759, -318, 63}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 5, 0}, + {4, 6, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {4.2, 1.2, 4.5}, + {5.775, 1.65, 5.625}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 5, 0}, + {4, 6, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {435, -183, 36}, + {543, -228, 45}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {4.2, 1.2, 4.5}, + {5.775, 1.65, 5.625}, + {7.35, 2.1, 6.75}, + {8.925, 2.55, 7.875}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {435, -183, 36}, + {543, -228, 45}, + {651, -273, 54}, + {759, -318, 63}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {4.2, 1.2, 4.5}, + {5.775, 1.65, 5.625}, + }, + }, + { + s: blas.Right, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 3, 4}, + {0, 5, 6}, + {0, 0, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {435, -183, 36}, + {543, -228, 45}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + m: 4, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 5, 0}, + {4, 6, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {15, -2.4, -1.2}, + {19.5, -3.3, -1.65}, + {24, -4.2, -2.1}, + {28.5, -5.1, -2.55}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + m: 4, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 5, 0}, + {4, 6, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + {16, 17, 18}, + {19, 20, 21}, + }, + ans: [][]float64{ + {30, -57, 258}, + {39, -75, 339}, + {48, -93, 420}, + {57, -111, 501}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + m: 2, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 5, 0}, + {4, 6, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {15, -2.4, -1.2}, + {19.5, -3.3, -1.65}, + }, + }, + { + s: blas.Right, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + m: 2, + n: 3, + alpha: 3, + a: [][]float64{ + {2, 0, 0}, + {3, 5, 0}, + {4, 6, 8}, + }, + b: [][]float64{ + {10, 11, 12}, + {13, 14, 15}, + }, + ans: [][]float64{ + {30, -57, 258}, + {39, -75, 339}, + }, + }, + } { + aFlat := flatten(test.a) + bFlat := flatten(test.b) + ansFlat := flatten(test.ans) + var lda int + if test.s == blas.Left { + lda = test.m + } else { + lda = test.n + } + blasser.Dtrsm(test.s, test.ul, test.tA, test.d, test.m, test.n, test.alpha, aFlat, lda, bFlat, test.n) + if !floats.EqualApprox(ansFlat, bFlat, 1e-13) { + t.Errorf("Case %v: Want %v, got %v.", i, ansFlat, bFlat) + } + } +} diff --git a/blas/testblas/dtrsv.go b/blas/testblas/dtrsv.go new file mode 100644 index 00000000..003b5b70 --- /dev/null +++ b/blas/testblas/dtrsv.go @@ -0,0 +1,144 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" + "github.com/gonum/floats" +) + +type Dtrsver interface { + Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) +} + +func DtrsvTest(t *testing.T, blasser Dtrsver) { + for i, test := range []struct { + n int + a [][]float64 + ul blas.Uplo + tA blas.Transpose + d blas.Diag + x []float64 + ans []float64 + }{ + { + n: 3, + a: [][]float64{ + {1, 2, 3}, + {0, 8, 15}, + {0, 0, 8}, + }, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.NonUnit, + x: []float64{5, 6, 7}, + ans: []float64{4.15625, -0.890625, 0.875}, + }, + { + n: 3, + a: [][]float64{ + {1, 2, 3}, + {0, 1, 15}, + {0, 0, 1}, + }, + ul: blas.Upper, + tA: blas.NoTrans, + d: blas.Unit, + x: []float64{5, 6, 7}, + ans: []float64{182, -99, 7}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {2, 8, 0}, + {3, 15, 8}, + }, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.NonUnit, + x: []float64{5, 6, 7}, + ans: []float64{5, -0.5, -0.0625}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {2, 8, 0}, + {3, 15, 8}, + }, + ul: blas.Lower, + tA: blas.NoTrans, + d: blas.Unit, + x: []float64{5, 6, 7}, + ans: []float64{5, -4, 52}, + }, + { + n: 3, + a: [][]float64{ + {1, 2, 3}, + {0, 8, 15}, + {0, 0, 8}, + }, + ul: blas.Upper, + tA: blas.Trans, + d: blas.NonUnit, + x: []float64{5, 6, 7}, + ans: []float64{5, -0.5, -0.0625}, + }, + { + n: 3, + a: [][]float64{ + {1, 2, 3}, + {0, 8, 15}, + {0, 0, 8}, + }, + ul: blas.Upper, + tA: blas.Trans, + d: blas.Unit, + x: []float64{5, 6, 7}, + ans: []float64{5, -4, 52}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {2, 8, 0}, + {3, 15, 8}, + }, + ul: blas.Lower, + tA: blas.Trans, + d: blas.NonUnit, + x: []float64{5, 6, 7}, + ans: []float64{4.15625, -0.890625, 0.875}, + }, + { + n: 3, + a: [][]float64{ + {1, 0, 0}, + {2, 1, 0}, + {3, 15, 1}, + }, + ul: blas.Lower, + tA: blas.Trans, + d: blas.Unit, + x: []float64{5, 6, 7}, + ans: []float64{182, -99, 7}, + }, + } { + incTest := func(incX, extra int) { + aFlat := flatten(test.a) + x := makeIncremented(test.x, incX, extra) + blasser.Dtrsv(test.ul, test.tA, test.d, test.n, aFlat, test.n, x, incX) + ans := makeIncremented(test.ans, incX, extra) + if !floats.EqualApprox(x, ans, 1e-14) { + t.Errorf("Case %v, incX = %v: Want %v, got %v.", i, incX, ans, x) + } + } + incTest(1, 0) + incTest(-2, 0) + incTest(3, 0) + incTest(-3, 8) + incTest(4, 2) + } +} diff --git a/blas/testblas/dtxmv.go b/blas/testblas/dtxmv.go new file mode 100644 index 00000000..eecc10e9 --- /dev/null +++ b/blas/testblas/dtxmv.go @@ -0,0 +1,145 @@ +package testblas + +import ( + "testing" + + "github.com/gonum/blas" +) + +type Dtxmver interface { + Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) + Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) + Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, x []float64, incX int) +} + +type vec struct { + data []float64 + inc int +} + +var cases = []struct { + n, k int + ul blas.Uplo + d blas.Diag + ldab int + tr, tb, tp []float64 + ins []vec + solNoTrans []float64 + solTrans []float64 +}{ + { + n: 3, + k: 1, + ul: blas.Upper, + d: blas.NonUnit, + tr: []float64{1, 2, 0, 0, 3, 4, 0, 0, 5}, + tb: []float64{1, 2, 3, 4, 5, 0}, + ldab: 2, + tp: []float64{1, 2, 0, 3, 4, 5}, + ins: []vec{ + {[]float64{2, 3, 4}, 1}, + {[]float64{2, 1, 3, 1, 4}, 2}, + {[]float64{4, 1, 3, 1, 2}, -2}, + }, + solNoTrans: []float64{8, 25, 20}, + solTrans: []float64{2, 13, 32}, + }, + { + n: 3, + k: 1, + ul: blas.Upper, + d: blas.Unit, + tr: []float64{1, 2, 0, 0, 3, 4, 0, 0, 5}, + tb: []float64{1, 2, 3, 4, 5, 0}, + ldab: 2, + tp: []float64{1, 2, 0, 3, 4, 5}, + ins: []vec{ + {[]float64{2, 3, 4}, 1}, + {[]float64{2, 1, 3, 1, 4}, 2}, + {[]float64{4, 1, 3, 1, 2}, -2}, + }, + solNoTrans: []float64{8, 19, 4}, + solTrans: []float64{2, 7, 16}, + }, + { + n: 3, + k: 1, + ul: blas.Lower, + d: blas.NonUnit, + tr: []float64{1, 0, 0, 2, 3, 0, 0, 4, 5}, + tb: []float64{0, 1, 2, 3, 4, 5}, + ldab: 2, + tp: []float64{1, 2, 3, 0, 4, 5}, + ins: []vec{ + {[]float64{2, 3, 4}, 1}, + {[]float64{2, 1, 3, 1, 4}, 2}, + {[]float64{4, 1, 3, 1, 2}, -2}, + }, + solNoTrans: []float64{2, 13, 32}, + solTrans: []float64{8, 25, 20}, + }, + { + n: 3, + k: 1, + ul: blas.Lower, + d: blas.Unit, + tr: []float64{1, 0, 0, 2, 3, 0, 0, 4, 5}, + tb: []float64{0, 1, 2, 3, 4, 5}, + ldab: 2, + tp: []float64{1, 2, 3, 0, 4, 5}, + ins: []vec{ + {[]float64{2, 3, 4}, 1}, + {[]float64{2, 1, 3, 1, 4}, 2}, + {[]float64{4, 1, 3, 1, 2}, -2}, + }, + solNoTrans: []float64{2, 7, 16}, + solTrans: []float64{8, 19, 4}, + }, +} + +func DtxmvTest(t *testing.T, blasser Dtxmver) { + + for nc, c := range cases { + for nx, x := range c.ins { + in := make([]float64, len(x.data)) + copy(in, x.data) + blasser.Dtrmv(c.ul, blas.NoTrans, c.d, c.n, c.tr, c.n, in, x.inc) + if !dStridedSliceTolEqual(c.n, in, x.inc, c.solNoTrans, 1) { + t.Error("Wrong Dtrmv result for: NoTrans in Case:", nc, "input:", nx) + } + + in = make([]float64, len(x.data)) + copy(in, x.data) + blasser.Dtrmv(c.ul, blas.Trans, c.d, c.n, c.tr, c.n, in, x.inc) + if !dStridedSliceTolEqual(c.n, in, x.inc, c.solTrans, 1) { + t.Error("Wrong Dtrmv result for: Trans in Case:", nc, "input:", nx) + } + in = make([]float64, len(x.data)) + copy(in, x.data) + blasser.Dtbmv(c.ul, blas.NoTrans, c.d, c.n, c.k, c.tb, c.ldab, in, x.inc) + if !dStridedSliceTolEqual(c.n, in, x.inc, c.solNoTrans, 1) { + t.Error("Wrong Dtbmv result for: NoTrans in Case:", nc, "input:", nx) + } + + in = make([]float64, len(x.data)) + copy(in, x.data) + blasser.Dtbmv(c.ul, blas.Trans, c.d, c.n, c.k, c.tb, c.ldab, in, x.inc) + if !dStridedSliceTolEqual(c.n, in, x.inc, c.solTrans, 1) { + t.Error("Wrong Dtbmv result for: Trans in Case:", nc, "input:", nx) + } + in = make([]float64, len(x.data)) + copy(in, x.data) + blasser.Dtpmv(c.ul, blas.NoTrans, c.d, c.n, c.tp, in, x.inc) + if !dStridedSliceTolEqual(c.n, in, x.inc, c.solNoTrans, 1) { + t.Error("Wrong Dtpmv result for: NoTrans in Case:", nc, "input:", nx) + } + + in = make([]float64, len(x.data)) + copy(in, x.data) + blasser.Dtpmv(c.ul, blas.Trans, c.d, c.n, c.tp, in, x.inc) + if !dStridedSliceTolEqual(c.n, in, x.inc, c.solTrans, 1) { + t.Error("Wrong Dtpmv result for: Trans in Case:", nc, "input:", nx) + } + } + } +} diff --git a/blas/testblas/level1double.go b/blas/testblas/level1double.go new file mode 100644 index 00000000..9e160591 --- /dev/null +++ b/blas/testblas/level1double.go @@ -0,0 +1,2049 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package testblas provides tests for blas implementations. +package testblas + +import ( + "fmt" + "log" + + "github.com/gonum/blas" + "github.com/gonum/floats" + + "math" + "testing" +) + +type DoubleOneVectorCase struct { + Name string + X []float64 + Incx int + N int + Panic bool + Dasum float64 + Dnrm2 float64 + Idamax int + DscalCases []DScalCase +} + +type DScalCase struct { + Alpha float64 + Ans []float64 + Name string +} + +var DoubleOneVectorCases = []DoubleOneVectorCase{ + { + Name: "AllPositive", + X: []float64{6, 5, 4, 2, 6}, + Incx: 1, + N: 5, + Panic: false, + Dasum: 23, + Dnrm2: 10.81665382639196787935766380241148783875388972153573863813135, + Idamax: 0, + DscalCases: []DScalCase{ + { + Alpha: 0, + Ans: []float64{0, 0, 0, 0, 0}, + }, + { + Alpha: 1, + Ans: []float64{6, 5, 4, 2, 6}, + }, + { + Alpha: -2, + Ans: []float64{-12, -10, -8, -4, -12}, + }, + }, + }, + { + Name: "LeadingZero", + X: []float64{0, 1}, + Incx: 1, + N: 2, + Panic: false, + Dasum: 1, + Dnrm2: 1, + Idamax: 1, + DscalCases: []DScalCase{ + { + Alpha: 0, + Ans: []float64{0, 0}, + }, + { + Alpha: 1, + Ans: []float64{0, 1}, + }, + { + Alpha: -2, + Ans: []float64{0, -2}, + }, + }, + }, + { + Name: "MaxInMiddle", + X: []float64{6, 5, 9, 0, 6}, + Incx: 1, + N: 5, + Panic: false, + Dasum: 26, + Dnrm2: 13.34166406412633371248943627250846646911846482744007727141318, + Idamax: 2, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-12, -10, -18, 0, -12}, + }, + }, + }, + { + Name: "MaxAtEnd", + X: []float64{6, 5, -9, 0, 10}, + Incx: 1, + N: 5, + Panic: false, + Dasum: 30, + Dnrm2: 15.55634918610404553681857596630667886426639062914642880494347, + Idamax: 4, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-12, -10, 18, 0, -20}, + }, + }, + }, + { + Name: "AllNegative", + X: []float64{-6, -5, -4, -2, -6}, + Incx: 1, + N: 5, + Panic: false, + Dasum: 23, + Dnrm2: 10.81665382639196787935766380241148783875388972153573863813135, + Idamax: 0, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{12, 10, 8, 4, 12}, + }, + }, + }, + { + Name: "AllMixed", + X: []float64{-6, 5, 4, -2, -6}, + Incx: 1, + N: 5, + Panic: false, + Dasum: 23, + Dnrm2: 10.81665382639196787935766380241148783875388972153573863813135, + Idamax: 0, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{12, -10, -8, 4, 12}, + }, + }, + }, + { + Name: "ZeroN", + X: []float64{-6, 5, 4, -2, -6}, + Incx: 1, + N: 0, + Panic: false, + Dasum: 0, + Dnrm2: 0, + Idamax: -1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-6, 5, 4, -2, -6}, + }, + }, + }, + { + Name: "OneN", + X: []float64{-6, 5, 4, -2, -6}, + Incx: 1, + N: 1, + Panic: false, + Dasum: 6, + Dnrm2: 6, + Idamax: 0, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{12, 5, 4, -2, -6}, + }, + }, + }, + { + Name: "PositiveExactInc", + X: []float64{-6, 5, 10, -2, -5}, + Incx: 2, + N: 3, + Panic: false, + Dasum: 21, + Dnrm2: 12.68857754044952038019377274608948979173952662752515253090272, + Idamax: 1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{12, 5, -20, -2, 10}, + }, + }, + }, + { + Name: "PositiveOffInc", + X: []float64{-6, 5, 4, -2, -6, 8, 10, 11}, + Incx: 3, + N: 3, + Panic: false, + Dasum: 18, + Dnrm2: 11.83215956619923208513465658312323409683100246158868064575943, + Idamax: 2, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{12, 5, 4, 4, -6, 8, -20, 11}, + }, + }, + }, + { + Name: "PositiveShortInc", + X: []float64{-6, 5, 4, -2, -6, 8, 10, 11}, + Incx: 3, + N: 2, + Panic: false, + Dasum: 8, + Dnrm2: 6.324555320336758663997787088865437067439110278650433653715009, + Idamax: 0, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{12, 5, 4, 4, -6, 8, 10, 11}, + }, + }, + }, + { + Name: "NegativeInc", + X: []float64{-6, 5, 4, -2, -6}, + Incx: -1, + N: 5, + Panic: false, + Dasum: 0, + Dnrm2: 0, + Idamax: -1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-6, 5, 4, -2, -6}, + }, + }, + }, + { + Name: "NegativeExactInc", + X: []float64{-6, 5, 4, -2, -6}, + Incx: -2, + N: 3, + Panic: false, + Dasum: 0, + Dnrm2: 0, + Idamax: -1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-6, 5, 4, -2, -6}, + }, + }, + }, + { + Name: "NegativeOffInc", + X: []float64{-6, 5, 4, -2, -6, 8, 10, 11}, + Incx: -3, + N: 2, + Panic: false, + Dasum: 0, + Dnrm2: 0, + Idamax: -1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-6, 5, 4, -2, -6, 8, 10, 11}, + }, + }, + }, + { + Name: "NegativeShortInc", + X: []float64{-6, 5, 4, -2, -6, 8, 10, 11}, + Incx: -3, + N: 2, + Panic: false, + Dasum: 0, + Dnrm2: 0, + Idamax: -1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-6, 5, 4, -2, -6, 8, 10, 11}, + }, + }, + }, + { + Name: "NegativeN", + X: []float64{-6, 5, 4, -2, -6}, + Incx: 2, + N: -5, + Panic: true, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-6, 5, 4, -2, -6}, + }, + }, + }, + { + Name: "ZeroInc", + X: []float64{-6, 5, 4, -2, -6}, + Incx: 0, + N: 5, + Panic: true, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-6, 5, 4, -2, -6}, + }, + }, + }, + { + Name: "OutOfBounds", + X: []float64{-6, 5, 4, -2, -6}, + Incx: 2, + N: 6, + Panic: true, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-6, 5, 4, -2, -6}, + }, + }, + }, + { + Name: "NegativeOutOfBounds", + X: []float64{-6, 5, 4, -2, -6}, + Incx: -2, + N: 6, + Panic: false, + Dasum: 0, + Dnrm2: 0, + Idamax: -1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-6, 5, 4, -2, -6}, + }, + }, + }, + { + Name: "NaN", + X: []float64{math.NaN(), 2.0}, + Incx: 1, + N: 2, + Panic: false, + Dasum: math.NaN(), + Dnrm2: math.NaN(), + Idamax: 0, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{math.NaN(), -4.0}, + }, + { + Alpha: 0, + Ans: []float64{0, 0}, + }, + }, + }, + { + Name: "NaNInc", + X: []float64{math.NaN(), math.NaN(), 2.0}, + Incx: 2, + N: 2, + Panic: false, + Dasum: math.NaN(), + Dnrm2: math.NaN(), + Idamax: 0, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{math.NaN(), math.NaN(), -4.0}, + }, + { + Alpha: 0, + Ans: []float64{0, math.NaN(), 0}, + }, + }, + }, + { + Name: "Empty", + X: []float64{}, + Incx: 1, + N: 0, + Panic: false, + Dasum: 0, + Dnrm2: 0, + Idamax: -1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{}, + }, + { + Alpha: 0, + Ans: []float64{}, + }, + }, + }, + { + Name: "EmptyZeroInc", + X: []float64{}, + Incx: 0, + N: 0, + Panic: true, + Dasum: 0, + Dnrm2: 0, + Idamax: -1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{}, + }, + { + Alpha: 0, + Ans: []float64{}, + }, + }, + }, + { + Name: "EmptyReverse", + X: []float64{}, + Incx: -1, + N: 0, + Panic: false, + Dasum: 0, + Dnrm2: 0, + Idamax: -1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{}, + }, + { + Alpha: 0, + Ans: []float64{}, + }, + }, + }, + { + Name: "MultiInf", + X: []float64{5, math.Inf(1), math.Inf(-1), 8, 9}, + Incx: 1, + N: 5, + Panic: false, + Dasum: math.Inf(1), + Dnrm2: math.Inf(1), + Idamax: 1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-10, math.Inf(-1), math.Inf(1), -16, -18}, + }, + { + Alpha: 0, + Ans: []float64{0, 0, 0, 0, 0}, + }, + }, + }, + { + Name: "NaNInf", + X: []float64{5, math.NaN(), math.Inf(-1), 8, 9}, + Incx: 1, + N: 5, + Panic: false, + Dasum: math.NaN(), + Dnrm2: math.NaN(), + Idamax: 2, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-10, math.NaN(), math.Inf(1), -16, -18}, + }, + { + Alpha: 0, + Ans: []float64{0, 0, 0, 0, 0}, + }, + }, + }, + { + Name: "InfNaN", + X: []float64{5, math.Inf(1), math.NaN(), 8, 9}, + Incx: 1, + N: 5, + Panic: false, + Dasum: math.NaN(), + Dnrm2: math.NaN(), + Idamax: 1, + DscalCases: []DScalCase{ + { + Alpha: -2, + Ans: []float64{-10, math.Inf(-1), math.NaN(), -16, -18}, + }, + { + Alpha: 0, + Ans: []float64{0, 0, 0, 0, 0}, + }, + }, + }, +} + +type DoubleTwoVectorCase struct { + Name string + X []float64 + Y []float64 + XTmp []float64 + YTmp []float64 + Incx int + Incy int + N int + Panic bool + // For Daxpy + DaxpyCases []DaxpyCase + DdotAns float64 + DswapAns DTwoVecAnswer + DcopyAns DTwoVecAnswer + DrotCases []DrotCase + DrotmCases []DrotmCase +} + +type DaxpyCase struct { + Alpha float64 + Ans []float64 +} + +type DrotCase struct { + C float64 + S float64 + XAns []float64 + YAns []float64 +} + +type DrotmCase struct { + P blas.DrotmParams + XAns []float64 + YAns []float64 + Name string +} + +type DTwoVecAnswer struct { + X []float64 + Y []float64 +} + +var DoubleTwoVectorCases = []DoubleTwoVectorCase{ + { + Name: "UnitaryInc", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0}, + Incx: 1, + Incy: 1, + N: 6, + Panic: false, + DaxpyCases: []DaxpyCase{ + { + Alpha: 1, + Ans: []float64{18, 13, -2, 10, 20, 4}, + }, + { + Alpha: 2, + Ans: []float64{28, 28, -8, 13, 34, 11}, + }, + { + Alpha: -3, + Ans: []float64{-22, -47, 22, -2, -36, -24}, + }, + { + Alpha: 0, + Ans: []float64{8, -2, 4, 7, 6, -3}, + }, + }, + DdotAns: 110, + DswapAns: DTwoVecAnswer{ + X: []float64{8, -2, 4, 7, 6, -3}, + Y: []float64{10, 15, -6, 3, 14, 7}, + }, + DcopyAns: DTwoVecAnswer{ + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{10, 15, -6, 3, 14, 7}, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(0), + S: math.Sin(0), + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3}, + }, + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{12.444023964292095, 12.749380282068351, -3.7473736752571014, 5.677251193294846, 15.224018588957296, 5.076299724034451}, + YAns: []float64{3.024279678886205, -8.151889500183792, 6.160940718590796, 5.076299724034451, -0.4788089421498931, -5.677251193294846}, + }, + { + C: math.Cos(0.5 * math.Pi), + S: math.Sin(0.5 * math.Pi), + XAns: []float64{8, -2, 4, 7, 6, -3}, + YAns: []float64{-10, -15, 6, -3, -14, -7}, + }, + { + C: math.Cos(math.Pi), + S: math.Sin(math.Pi), + XAns: []float64{-10, -15, 6, -3, -14, -7}, + YAns: []float64{-8, 2, -4, -7, -6, 3}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Identity, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3}, + Name: "Neg2Flag", + }, + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{8.2, 13.7, -5.8, 2, 12, 6.6}, + YAns: []float64{5, 0.5, 1.4, 3.8, 4.4, -0.8}, + Name: "Neg1Flag", + }, + { + P: blas.DrotmParams{ + Flag: blas.OffDiagonal, + H: [4]float64{1, 0.1, -0.1, 1}, + }, + XAns: []float64{9.2, 15.2, -6.4, 2.3, 13.4, 7.3}, + YAns: []float64{9, -0.5, 3.4, 7.3, 7.4, -2.3}, + Name: "ZeroFlag", + }, + { + P: blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{0.5, -1, 1, 0.7}, + }, + XAns: []float64{13, 5.5, 1, 8.5, 13, 0.5}, + YAns: []float64{-4.4, -16.4, 8.8, 1.9, -9.8, -9.1}, + Name: "OneFlag", + }, + }, + }, + { + Name: "UnitaryIncLong", + X: []float64{10, 15, -6, 3, 14, 7, 8, -9, 10}, + Y: []float64{8, -2, 4, 7, 6, -3, 7, -6}, + XTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: 1, + Incy: 1, + N: 6, + Panic: false, + DaxpyCases: []DaxpyCase{ + { + Alpha: 1, + Ans: []float64{18, 13, -2, 10, 20, 4, 7, -6}, + }, + { + Alpha: 2, + Ans: []float64{28, 28, -8, 13, 34, 11, 7, -6}, + }, + { + Alpha: -3, + Ans: []float64{-22, -47, 22, -2, -36, -24, 7, -6}, + }, + { + Alpha: 0, + Ans: []float64{8, -2, 4, 7, 6, -3, 7, -6}, + }, + }, + DdotAns: 110, + DswapAns: DTwoVecAnswer{ + X: []float64{8, -2, 4, 7, 6, -3, 8, -9, 10}, + Y: []float64{10, 15, -6, 3, 14, 7, 7, -6}, + }, + DcopyAns: DTwoVecAnswer{ + X: []float64{10, 15, -6, 3, 14, 7, 8, -9, 10}, + Y: []float64{10, 15, -6, 3, 14, 7, 7, -6}, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(0), + S: math.Sin(0), + XAns: []float64{10, 15, -6, 3, 14, 7, 8, -9, 10}, + YAns: []float64{8, -2, 4, 7, 6, -3, 7, -6}, + }, + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{12.444023964292095, 12.749380282068351, -3.7473736752571014, 5.677251193294846, 15.224018588957296, 5.076299724034451, 8, -9, 10}, + YAns: []float64{3.024279678886205, -8.151889500183792, 6.160940718590796, 5.076299724034451, -0.4788089421498931, -5.677251193294846, 7, -6}, + }, + { + C: math.Cos(0.5 * math.Pi), + S: math.Sin(0.5 * math.Pi), + XAns: []float64{8, -2, 4, 7, 6, -3, 8, -9, 10}, + YAns: []float64{-10, -15, 6, -3, -14, -7, 7, -6}, + }, + { + C: math.Cos(math.Pi), + S: math.Sin(math.Pi), + XAns: []float64{-10, -15, 6, -3, -14, -7, 8, -9, 10}, + YAns: []float64{-8, 2, -4, -7, -6, 3, 7, -6}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Identity, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{10, 15, -6, 3, 14, 7, 8, -9, 10}, + YAns: []float64{8, -2, 4, 7, 6, -3, 7, -6}, + Name: "Neg2Flag", + }, + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{8.2, 13.7, -5.8, 2, 12, 6.6, 8, -9, 10}, + YAns: []float64{5, 0.5, 1.4, 3.8, 4.4, -0.8, 7, -6}, + Name: "Neg1Flag", + }, + { + P: blas.DrotmParams{ + Flag: blas.OffDiagonal, + H: [4]float64{1, 0.1, -0.1, 1}, + }, + XAns: []float64{9.2, 15.2, -6.4, 2.3, 13.4, 7.3, 8, -9, 10}, + YAns: []float64{9, -0.5, 3.4, 7.3, 7.4, -2.3, 7, -6}, + Name: "ZeroFlag", + }, + { + P: blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{0.5, -1, 1, 0.7}, + }, + XAns: []float64{13, 5.5, 1, 8.5, 13, 0.5, 8, -9, 10}, + YAns: []float64{-4.4, -16.4, 8.8, 1.9, -9.8, -9.1, 7, -6}, + Name: "OneFlag", + }, + }, + }, + { + Name: "PositiveInc", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: 2, + Incy: 3, + N: 3, + Panic: false, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{28, -2, 4, -5, 6, -3, 24, 10}, + }, + }, + DdotAns: -18, + DswapAns: DTwoVecAnswer{ + X: []float64{8, 15, 7, 3, -4, 7}, + Y: []float64{10, -2, 4, -6, 6, -3, 14, 10}, + }, + DcopyAns: DTwoVecAnswer{ + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{10, -2, 4, -6, 6, -3, 14, 10}, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{12.444023964292095, 15, -2.479518890035003, 3, 10.997835971550302, 7}, + YAns: []float64{3.024279678886205, -2, 4, 8.879864079700745, 6, -3, -9.541886812516392, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{8.2, 15, -6.1, 3, 13, 7}, + YAns: []float64{5, -2, 4, 2.9, 6, -3, -0.6, 10}, + }, + { + P: blas.DrotmParams{ + Flag: blas.OffDiagonal, + H: [4]float64{1, 0.1, -0.1, 1}, + }, + XAns: []float64{9.2, 15, -6.7, 3, 14.4, 7}, + YAns: []float64{9, -2, 4, 6.4, 6, -3, -2.6, 10}, + }, + { + P: blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{0.5, -1, 1, 0.7}, + }, + XAns: []float64{13, 15, 4, 3, 3, 7}, + YAns: []float64{-4.4, -2, 4, 10.9, 6, -3, -16.8, 10}, + }, + }, + }, + { + Name: "NegativeInc", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: -2, + Incy: -3, + N: 3, + Panic: false, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{28, -2, 4, -5, 6, -3, 24, 10}, + }, + }, + DdotAns: -18, + DswapAns: DTwoVecAnswer{ + X: []float64{8, 15, 7, 3, -4, 7}, + Y: []float64{10, -2, 4, -6, 6, -3, 14, 10}, + }, + DcopyAns: DTwoVecAnswer{ + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{10, -2, 4, -6, 6, -3, 14, 10}, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{12.444023964292095, 15, -2.479518890035003, 3, 10.997835971550302, 7}, + YAns: []float64{3.024279678886205, -2, 4, 8.879864079700745, 6, -3, -9.541886812516392, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{8.2, 15, -6.1, 3, 13, 7}, + YAns: []float64{5, -2, 4, 2.9, 6, -3, -0.6, 10}, + }, + { + P: blas.DrotmParams{ + Flag: blas.OffDiagonal, + H: [4]float64{1, 0.1, -0.1, 1}, + }, + XAns: []float64{9.2, 15, -6.7, 3, 14.4, 7}, + YAns: []float64{9, -2, 4, 6.4, 6, -3, -2.6, 10}, + }, + { + P: blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{0.5, -1, 1, 0.7}, + }, + XAns: []float64{13, 15, 4, 3, 3, 7}, + YAns: []float64{-4.4, -2, 4, 10.9, 6, -3, -16.8, 10}, + }, + }, + }, + { + Name: "MixedInc1", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: 2, + Incy: -3, + N: 3, + Panic: false, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{36, -2, 4, -5, 6, -3, 16, 10}, + }, + }, + DdotAns: 30, + DswapAns: DTwoVecAnswer{ + X: []float64{-4, 15, 7, 3, 8, 7}, + Y: []float64{14, -2, 4, -6, 6, -3, 10, 10}, + }, + DcopyAns: DTwoVecAnswer{ + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{14, -2, 4, -6, 6, -3, 10, 10}, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{7.372604823403701, 15, -2.479518890035003, 3, 16.069255112438693, 7}, + YAns: []float64{1.333806631923407, -2, 4, 8.879864079700745, 6, -3, -7.851413765553595, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{9.4, 15, -6.1, 3, 11.8, 7}, + YAns: []float64{5.4, -2, 4, 2.9, 6, -3, -1, 10}, + }, + { + P: blas.DrotmParams{ + Flag: blas.OffDiagonal, + H: [4]float64{1, 0.1, -0.1, 1}, + }, + XAns: []float64{10.4, 15, -6.7, 3, 13.2, 7}, + YAns: []float64{9.4, -2, 4, 6.4, 6, -3, -3, 10}, + }, + { + P: blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{0.5, -1, 1, 0.7}, + }, + XAns: []float64{1, 15, 4, 3, 15, 7}, + YAns: []float64{-8.4, -2, 4, 10.9, 6, -3, -12.8, 10}, + }, + }, + }, + { + Name: "MixedInc2", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: -2, + Incy: 3, + N: 3, + Panic: false, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{36, -2, 4, -5, 6, -3, 16, 10}, + }, + }, + DdotAns: 30, + DswapAns: DTwoVecAnswer{ + X: []float64{-4, 15, 7, 3, 8, 7}, + Y: []float64{14, -2, 4, -6, 6, -3, 10, 10}, + }, + DcopyAns: DTwoVecAnswer{ + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{14, -2, 4, -6, 6, -3, 10, 10}, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{7.372604823403701, 15, -2.479518890035003, 3, 16.069255112438693, 7}, + YAns: []float64{1.333806631923407, -2, 4, 8.879864079700745, 6, -3, -7.851413765553595, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{9.4, 15, -6.1, 3, 11.8, 7}, + YAns: []float64{5.4, -2, 4, 2.9, 6, -3, -1, 10}, + }, + { + P: blas.DrotmParams{ + Flag: blas.OffDiagonal, + H: [4]float64{1, 0.1, -0.1, 1}, + }, + XAns: []float64{10.4, 15, -6.7, 3, 13.2, 7}, + YAns: []float64{9.4, -2, 4, 6.4, 6, -3, -3, 10}, + }, + { + P: blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{0.5, -1, 1, 0.7}, + }, + XAns: []float64{1, 15, 4, 3, 15, 7}, + YAns: []float64{-8.4, -2, 4, 10.9, 6, -3, -12.8, 10}, + }, + }, + }, + { + Name: "ZeroN", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: -2, + Incy: 3, + N: 0, + Panic: false, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + }, + DswapAns: DTwoVecAnswer{ + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + DcopyAns: DTwoVecAnswer{ + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + }, + }, + { + Name: "NegativeN", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: -2, + Incy: 3, + N: -3, + Panic: true, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{36, -2, 4, -5, 6, -3, 16, 10}, + }, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{8.2, 13.7, -5.8, 2, 12, 6.6}, + YAns: []float64{5, 0.5, 1.4, 3.8, 4.4, -0.8}, + }, + }, + }, + { + Name: "ZeroIncX", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: 0, + Incy: 3, + N: 2, + Panic: true, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{36, -2, 4, -5, 6, -3, 16, 10}, + }, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{8.2, 13.7, -5.8, 2, 12, 6.6}, + YAns: []float64{5, 0.5, 1.4, 3.8, 4.4, -0.8}, + }, + }, + }, + { + Name: "ZeroIncY", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: 1, + Incy: 0, + N: 2, + Panic: true, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{36, -2, 4, -5, 6, -3, 16, 10}, + }, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{8.2, 13.7, -5.8, 2, 12, 6.6}, + YAns: []float64{5, 0.5, 1.4, 3.8, 4.4, -0.8}, + }, + }, + }, + { + Name: "OutOfBoundsX", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: 8, + Incy: 2, + N: 2, + Panic: true, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{36, -2, 4, -5, 6, -3, 16, 10}, + }, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{8.2, 13.7, -5.8, 2, 12, 6.6}, + YAns: []float64{5, 0.5, 1.4, 3.8, 4.4, -0.8}, + }, + }, + }, + { + Name: "OutOfBoundsY", + X: []float64{10, 15, -6, 3, 14, 7}, + Y: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + XTmp: []float64{0, 0, 0, 0, 0, 0}, + YTmp: []float64{0, 0, 0, 0, 0, 0, 0, 0}, + Incx: 2, + Incy: 8, + N: 2, + Panic: true, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{36, -2, 4, -5, 6, -3, 16, 10}, + }, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{10, 15, -6, 3, 14, 7}, + YAns: []float64{8, -2, 4, 7, 6, -3, -4, 10}, + }, + }, + }, + { + Name: "Empty", + X: []float64{}, + Y: []float64{}, + Incx: 1, + Incy: 1, + N: 0, + Panic: false, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{}, + }, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{}, + YAns: []float64{}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{}, + YAns: []float64{}, + }, + }, + }, + { + Name: "EmptyZeroIncX", + X: []float64{}, + Y: []float64{}, + Incx: 0, + Incy: 1, + N: 0, + Panic: true, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{}, + }, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{}, + YAns: []float64{}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{}, + YAns: []float64{}, + }, + }, + }, + { + Name: "EmptyZeroIncY", + X: []float64{}, + Y: []float64{}, + Incx: 1, + Incy: 0, + N: 0, + Panic: true, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{}, + }, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{}, + YAns: []float64{}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{}, + YAns: []float64{}, + }, + }, + }, + { + Name: "EmptyReverse", + X: []float64{}, + Y: []float64{}, + Incx: -1, + Incy: -1, + N: 0, + Panic: false, + DaxpyCases: []DaxpyCase{ + { + Alpha: 2, + Ans: []float64{}, + }, + }, + DrotCases: []DrotCase{ + { + C: math.Cos(25 * math.Pi / 180), + S: math.Sin(25 * math.Pi / 180), + XAns: []float64{}, + YAns: []float64{}, + }, + }, + DrotmCases: []DrotmCase{ + { + P: blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.9, 0.1, -0.1, 0.5}, + }, + XAns: []float64{}, + YAns: []float64{}, + }, + }, + }, +} + +type Ddotter interface { + Ddot(n int, x []float64, incX int, y []float64, incY int) float64 +} + +func DdotTest(t *testing.T, d Ddotter) { + ddot := d.Ddot + for _, c := range DoubleTwoVectorCases { + dCopyTwoTmp(c.X, c.XTmp, c.Y, c.YTmp) + if c.Panic { + f := func() { ddot(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy) } + testpanics(f, c.Name, t) + continue + } + dot := ddot(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy) + if !dTolEqual(dot, c.DdotAns) { + t.Errorf("ddot: mismatch %v: expected %v, found %v", c.Name, c.DdotAns, dot) + } + } + + // check it works for 16-byte unaligned slices + x := []float64{1, 1, 1, 1, 1} + if n := ddot(4, x[:4], 1, x[1:], 1); n != 4 { + t.Errorf("ddot: mismatch Unaligned: expected %v, found %v", 4, n) + } + if n := ddot(2, x[:4], 2, x[1:], 2); n != 2 { + t.Errorf("ddot: mismatch Unaligned: expected %v, found %v", 2, n) + } + if n := ddot(2, x[:4], 3, x[1:], 3); n != 2 { + t.Errorf("ddot: mismatch Unaligned: expected %v, found %v", 2, n) + } +} + +type Dnrm2er interface { + Dnrm2(n int, x []float64, incX int) float64 +} + +func Dnrm2Test(t *testing.T, blasser Dnrm2er) { + dnrm2 := blasser.Dnrm2 + for _, c := range DoubleOneVectorCases { + if c.Panic { + f := func() { dnrm2(c.N, c.X, c.Incx) } + testpanics(f, c.Name, t) + continue + } + v := dnrm2(c.N, c.X, c.Incx) + if !dTolEqual(v, c.Dnrm2) { + t.Errorf("dnrm2: mismatch %v: expected %v, found %v", c.Name, c.Dnrm2, v) + } + } +} + +type Dasumer interface { + Dasum(n int, x []float64, incX int) float64 +} + +func DasumTest(t *testing.T, blasser Dasumer) { + dasum := blasser.Dasum + for _, c := range DoubleOneVectorCases { + if c.Panic { + f := func() { dasum(c.N, c.X, c.Incx) } + testpanics(f, c.Name, t) + continue + } + v := dasum(c.N, c.X, c.Incx) + if !dTolEqual(v, c.Dasum) { + t.Errorf("dasum: mismatch %v: expected %v, found %v", c.Name, c.Dasum, v) + } + } +} + +type Idamaxer interface { + Idamax(n int, x []float64, incX int) int +} + +func IdamaxTest(t *testing.T, blasser Idamaxer) { + idamax := blasser.Idamax + for _, c := range DoubleOneVectorCases { + if c.Panic { + f := func() { idamax(c.N, c.X, c.Incx) } + testpanics(f, c.Name, t) + continue + } + v := idamax(c.N, c.X, c.Incx) + if v != c.Idamax { + s := fmt.Sprintf("idamax: mismatch %v: expected %v, found %v", c.Name, c.Idamax, v) + if floats.HasNaN(c.X) { + log.Println(s) + } else { + t.Errorf(s) + } + } + } +} + +type Dswapper interface { + Dswap(n int, x []float64, incX int, y []float64, incY int) +} + +func DswapTest(t *testing.T, d Dswapper) { + dswap := d.Dswap + for _, c := range DoubleTwoVectorCases { + dCopyTwoTmp(c.X, c.XTmp, c.Y, c.YTmp) + if c.Panic { + f := func() { dswap(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy) } + testpanics(f, c.Name, t) + continue + } + dswap(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy) + if !dSliceTolEqual(c.XTmp, c.DswapAns.X) { + t.Errorf("dswap: x mismatch %v: expected %v, found %v", c.Name, c.DswapAns.X, c.XTmp) + } + if !dSliceTolEqual(c.YTmp, c.DswapAns.Y) { + t.Errorf("dswap: y mismatch %v: expected %v, found %v", c.Name, c.DswapAns.Y, c.YTmp) + } + } +} + +type Dcopier interface { + Dcopy(n int, x []float64, incX int, y []float64, incY int) +} + +func DcopyTest(t *testing.T, d Dcopier) { + dcopy := d.Dcopy + for _, c := range DoubleTwoVectorCases { + dCopyTwoTmp(c.X, c.XTmp, c.Y, c.YTmp) + if c.Panic { + f := func() { dcopy(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy) } + testpanics(f, c.Name, t) + continue + } + dcopy(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy) + if !dSliceTolEqual(c.XTmp, c.DcopyAns.X) { + t.Errorf("dswap: x mismatch %v: expected %v, found %v", c.Name, c.DcopyAns.X, c.XTmp) + } + if !dSliceTolEqual(c.YTmp, c.DcopyAns.Y) { + t.Errorf("dswap: y mismatch %v: expected %v, found %v", c.Name, c.DcopyAns.Y, c.YTmp) + } + } +} + +type Daxpyer interface { + Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) +} + +func DaxpyTest(t *testing.T, d Daxpyer) { + daxpy := d.Daxpy + for _, c := range DoubleTwoVectorCases { + for _, kind := range c.DaxpyCases { + dCopyTwoTmp(c.X, c.XTmp, c.Y, c.YTmp) + if c.Panic { + f := func() { daxpy(c.N, kind.Alpha, c.XTmp, c.Incx, c.YTmp, c.Incy) } + testpanics(f, c.Name, t) + continue + } + daxpy(c.N, kind.Alpha, c.XTmp, c.Incx, c.YTmp, c.Incy) + if !dSliceTolEqual(c.YTmp, kind.Ans) { + t.Errorf("daxpy: mismatch %v: expected %v, found %v", c.Name, kind.Ans, c.YTmp) + } + } + } +} + +type DrotgTestStruct struct { + Name string + A, B float64 + C, S, R, Z float64 +} + +var DrotgTests = []DrotgTestStruct{ + { + Name: "ZeroAB", + C: 1, + }, + { + Name: "PosA_ZeroB", + A: 0.5, + C: 1, + R: 0.5, + }, + { + Name: "NegA_ZeroB", + A: -4.6, + C: 1, + R: -4.6, + }, + { + Name: "ZeroA_PosB", + B: 3, + S: 1, + R: 3, + Z: 1, + }, + { + Name: "ZeroA_NegB", + B: -0.3, + S: 1, + R: -0.3, + Z: 1, + }, + { + Name: "PosA_PosB_AGTB", + A: 5, + B: 0.3, + C: 0.99820484546577868593549038000, + S: 0.05989229072794672115612942280, + R: 5.00899191454727744602429072688, + Z: 0.05989229072794672115612942280, + }, + { + Name: "PosA_PosB_ALTB", + A: 3, + B: 4, + C: 3.0 / 5, + S: 4.0 / 5, + R: 5, + Z: 5.0 / 3.0, + }, + + { + Name: "PosA_NegB_AGTB", + A: 2.6, + B: -0.9, + C: 0.94498607344025815971847507095, + S: -0.32711056388316628605639521686, + R: 2.751363298439520872718790879655, + Z: -0.3271105638831662860563952168, + }, + { + Name: "PosA_NegB_ALTB", + A: 2.6, + B: -2.9, + C: -0.6675450157520258540548049558, + S: 0.7445694406464903756765132200, + R: -3.8948684188300893100043812234, + Z: 1 / -0.6675450157520258540548049558, + }, + { + Name: "NegA_PosB_AGTB", + A: -11.4, + B: 10.3, + C: 0.7419981952497362418487847947, + S: -0.6704018781642353764072353847, + R: -15.363918770938617534070671122, + Z: -0.6704018781642353764072353847, + }, + { + Name: "NegA_PosB_ALTB", + A: -1.4, + B: 10.3, + C: -0.1346838895922121112404717523, + S: 0.9908886162855605326977564640, + R: 10.394710193170370442523552032, + Z: 1 / -0.1346838895922121112404717523, + }, + { + Name: "NegA_NegB_AGTB", + A: -11.4, + B: 10.3, + C: 0.7419981952497362418487847947, + S: -0.6704018781642353764072353847, + R: -15.363918770938617534070671122, + Z: -0.6704018781642353764072353847, + }, + { + Name: "NegA_NegB_ALTB", + A: -1.4, + B: -10.3, + C: 0.1346838895922121112404717523, + S: 0.9908886162855605326977564640, + R: -10.394710193170370442523552032, + Z: 1 / 0.1346838895922121112404717523, + }, +} + +type Drotger interface { + Drotg(a, b float64) (c, s, r, z float64) +} + +func DrotgTest(t *testing.T, d Drotger) { + drotg := d.Drotg + for _, test := range DrotgTests { + c, s, r, z := drotg(test.A, test.B) + if !dTolEqual(c, test.C) { + t.Errorf("drotg: c mismatch %v: expected %v, found %v", test.Name, test.C, c) + } + if !dTolEqual(s, test.S) { + t.Errorf("drotg: s mismatch %v: expected %v, found %v", test.Name, test.S, s) + } + if !dTolEqual(r, test.R) { + t.Errorf("drotg: r mismatch %v: expected %v, found %v", test.Name, test.R, r) + } + if !dTolEqual(z, test.Z) { + t.Errorf("drotg: z mismatch %v: expected %v, found %v", test.Name, test.Z, z) + } + } +} + +type DrotmgTestStruct struct { + Name string + D1, D2, X1, Y1 float64 + P *blas.DrotmParams + Rd1, Rd2, Rx1 float64 +} + +var DrotmgTests = []DrotmgTestStruct{ + { + Name: "NegD1", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + }, + D1: -4, + D2: 6, + X1: 8, + Y1: -4, + }, + { + Name: "ZeroD2", + P: &blas.DrotmParams{ + Flag: blas.Identity, + }, + D1: 4, + X1: 8, + Y1: -5, + Rd1: 4, + Rx1: 8, + }, + { + Name: "ZeroY1", + P: &blas.DrotmParams{ + Flag: blas.Identity, + }, + D1: 4, + D2: -6, + X1: 8, + Rd1: 4, + Rd2: -6, + Rx1: 8, + }, + { + Name: "NegQ2_and_AQ1_LT_AQ2", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + }, + D1: 8, + D2: -6, + X1: 4, + Y1: 8, + Rd1: 0, + Rd2: 0, + Rx1: 0, + }, + { + Name: "ZeroD1", + P: &blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{0, 0, 0, 2}, + }, + D1: 0, + D2: 2, + X1: 8, + Y1: 4, + Rd1: 2, + Rd2: 0, + Rx1: 4, + }, + { + Name: "AbsQ1_GT_AbsQU__D2_Pos", + P: &blas.DrotmParams{ + Flag: blas.OffDiagonal, + H: [4]float64{0, -0.625, 0.9375, 0}, + }, + D1: 2, + D2: 3, + X1: 8, + Y1: 5, + Rd1: 1.2610837438423645, + Rd2: 1.8916256157635467, + Rx1: 12.6875, + }, + { + Name: "AbsQ1_GT_AbsQU__D2_Neg", + P: &blas.DrotmParams{ + Flag: blas.OffDiagonal, + H: [4]float64{0, -0.625, -0.9375, 0}, + }, + D1: 2, + D2: -3, + X1: 8, + Y1: 5, + Rd1: 4.830188679245283, + Rd2: -7.245283018867925, + Rx1: 3.3125, + }, + { + Name: "AbsQ1_LT_AbsQU__D2_Pos", + P: &blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{5.0 / 12, 0, 0, 0.625}, + }, + D1: 2, + D2: 3, + X1: 5, + Y1: 8, + Rd1: 2.3801652892561984, + Rd2: 1.586776859504132, + Rx1: 121.0 / 12, + }, + { + Name: "D1=D2_X1=X2", + P: &blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{1, 0, 0, 1}, + }, + D1: 2, + D2: 2, + X1: 8, + Y1: 8, + Rd1: 1, + Rd2: 1, + Rx1: 16, + }, + { + Name: "RD1_Big_RD2_Big_Flag_0", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{4096, -3584, 1792, 4096}, + }, + D1: 1600000000, + D2: 800000000, + X1: 8, + Y1: 7, + Rd1: 68.96627824858757, + Rd2: 34.483139124293785, + Rx1: 45312, + }, + { + Name: "RD1_Big_RD2_Big_Flag_1", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{2340.5714285714284, -4096, 4096, 4681.142857142857}, + }, + D1: 800000000, + D2: 1600000000, + X1: 8, + Y1: 7, + Rd1: 57.6914092640818, + Rd2: 28.8457046320409, + Rx1: 47396.57142857142, + }, + { + Name: "RD1_Big_RD2_Med_Flag_0", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{4096, -1, 0.0004096, 1}, + }, + D1: 20000000, + D2: 2, + X1: 8, + Y1: 8, + Rd1: 1.1920927762985347, + Rd2: 1.9999998000000199, + Rx1: 32768.0032768, + }, + { + Name: "RD1_Big_RD2_Med_Flag_1", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{4.096e-17, -1, 4096, 1e-10}, + }, + D1: 2, + D2: 20000000000, + X1: 8, + Y1: 80000000000, + Rd1: 1192.0928955078125, + Rd2: 2, + Rx1: 3.2768e+14, + }, + + // TODO: Add D1 big, D2 small, Flag = 0 + { + Name: "D1_Big_D2_Small_Flag_1", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{2.8671999999999997e-26, -0.000244140625, 4096, 2.44140625e-16}, + }, + D1: 0.000000014, + D2: 2000000000, + X1: 0.000008, + Y1: 8000000, + Rd1: 119.20928955078125, + Rd2: 0.234881024, + Rx1: 3.2768e+10, + }, + + { + Name: "RD1_Med_RD2_Big_Flag_0", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{1, -0.0004096, 1000, 4096}, + }, + D1: 2, + D2: 20000000000, + X1: 80000000, + Y1: 8, + Rd1: 1.9998000199980002, + Rd2: 1191.9736981379988, + Rx1: 8.0008e+07, + }, + { + Name: "D1_Med_D2_Big_Flag_1", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{50, -4096, 1, 4.096e-06}, + }, + D1: 20000000000, + D2: 0.4, + X1: 80000000, + Y1: 80000000000000000, + Rd1: 0.39999998000000103, + Rd2: 1192.092835903171, + Rx1: 8.0000004e+16, + }, + { + Name: "RD1_Med_RD2_Small_Flag_0", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{1, -0.0007233796296296296, 1.1111111111111111e-10, 0.000244140625}, + }, + D1: 1.2, + D2: 0.000000000045, + X1: 2.7, + Y1: 8, + Rd1: 1.1999999996049382, + Rd2: 0.0007549747197514486, + Rx1: 2.700000000888889, + }, + { + Name: "RD1_Med_RD2_Small_Flag_1", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.0002197265625, -1, 0.000244140625, 3.375e-11}, + }, + D1: 1.2, + D2: 0.000000000045, + X1: 2.7, + Y1: 80000000000, + Rd1: 0.0007549747199770676, + Rd2: 1.19999999996355, + Rx1: 1.9531250000593264e+07, + }, + // TODO: Add Small, Big, 0 case + { + Name: "D1_Small_D2_Big_Flag_1", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{2.3731773997569866e+10, -1.6777216e+07, 0.000244140625, 1.6777216e-07}, + }, + D1: 120000000000000000, + D2: 0.000000000012345, + X1: 0.08, + Y1: 8000000000000, + Rd1: 0.00010502490698765249, + Rd2: 216.1836123957717, + Rx1: 3.8516669198055897e+09, + }, + { + Name: "RD1_Small_RD2_Med_Flag_0", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.000244140625, -1e-08, 0.24414062499999997, 1}, + }, + D1: 0.0000000002, + D2: 20, + X1: 0.8, + Y1: 0.000000008, + Rd1: 0.003355409645903541, + Rd2: 19.99980000199998, + Rx1: 0.000195314453125, + }, + { + Name: "RD1_Small_RD2_Med_Flag_1", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.0012207031250000002, -1, 0.000244140625, 1e-09}, + }, + D1: 0.02, + D2: 0.000000000004, + X1: 0.008, + Y1: 8000000, + Rd1: 6.710886366445568e-05, + Rd2: 0.019999999900000003, + Rx1: 1953.125009765625, + }, + // TODO: Add Small, Small, 0 case + // TODO: Add Small, Small, 1 case +} + +type Drotmger interface { + Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) +} + +func DrotmgTest(t *testing.T, d Drotmger) { + for _, test := range DrotmgTests { + + p, rd1, rd2, rx1 := d.Drotmg(test.D1, test.D2, test.X1, test.Y1) + + if p.Flag != test.P.Flag { + t.Errorf("drotmg flag mismatch %v: expected %v, found %v", test.Name, test.P.Flag, p.Flag) + } + for i, val := range p.H { + if !dTolEqual(test.P.H[i], val) { + t.Errorf("drotmg H mismatch %v: expected %v, found %v", test.Name, test.P.H, p.H) + break + } + } + if !dTolEqual(rd1, test.Rd1) { + t.Errorf("drotmg rd1 mismatch %v: expected %v, found %v", test.Name, test.Rd1, rd1) + } + if !dTolEqual(rd2, test.Rd2) { + t.Errorf("drotmg rd2 mismatch %v: expected %v, found %v", test.Name, test.Rd2, rd2) + } + if !dTolEqual(rx1, test.Rx1) { + t.Errorf("drotmg rx1 mismatch %v: expected %v, found %v", test.Name, test.Rx1, rx1) + } + } +} + +type Droter interface { + Drot(n int, x []float64, incX int, y []float64, incY int, c, s float64) +} + +func DrotTest(t *testing.T, d Droter) { + drot := d.Drot + for _, c := range DoubleTwoVectorCases { + for _, kind := range c.DrotCases { + dCopyTwoTmp(c.X, c.XTmp, c.Y, c.YTmp) + if c.Panic { + f := func() { drot(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy, kind.C, kind.S) } + testpanics(f, c.Name, t) + continue + } + drot(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy, kind.C, kind.S) + if !dSliceTolEqual(c.XTmp, kind.XAns) { + t.Errorf("drot: x mismatch %v: expected %v, found %v", c.Name, kind.XAns, c.XTmp) + } + if !dSliceTolEqual(c.YTmp, kind.YAns) { + t.Errorf("drot: y mismatch %v: expected %v, found %v", c.Name, kind.YAns, c.YTmp) + } + } + } +} + +type Drotmer interface { + Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) +} + +func DrotmTest(t *testing.T, d Drotmer) { + drotm := d.Drotm + for _, c := range DoubleTwoVectorCases { + for _, kind := range c.DrotmCases { + dCopyTwoTmp(c.X, c.XTmp, c.Y, c.YTmp) + if c.Panic { + f := func() { drotm(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy, kind.P) } + testpanics(f, c.Name+", "+kind.Name, t) + continue + } + drotm(c.N, c.XTmp, c.Incx, c.YTmp, c.Incy, kind.P) + if !dSliceTolEqual(c.XTmp, kind.XAns) { + t.Errorf("drotm: mismatch %v: expected %v, found %v", c.Name, kind.XAns, c.XTmp) + } + if !dSliceTolEqual(c.YTmp, kind.YAns) { + t.Errorf("drotm: mismatch %v: expected %v, found %v", c.Name, kind.YAns, c.YTmp) + } + } + } +} + +type Dscaler interface { + Dscal(n int, alpha float64, x []float64, incX int) +} + +func DscalTest(t *testing.T, blasser Dscaler) { + dscal := blasser.Dscal + for _, c := range DoubleOneVectorCases { + for _, kind := range c.DscalCases { + xTmp := make([]float64, len(c.X)) + copy(xTmp, c.X) + if c.Panic { + f := func() { dscal(c.N, kind.Alpha, xTmp, c.Incx) } + testpanics(f, c.Name, t) + continue + } + dscal(c.N, kind.Alpha, xTmp, c.Incx) + if !dSliceTolEqual(xTmp, kind.Ans) { + t.Errorf("dscal: mismatch %v, %v: expected %v, found %v", c.Name, kind.Name, kind.Ans, xTmp) + } + } + } +} diff --git a/blas/testblas/level2bench.go b/blas/testblas/level2bench.go new file mode 100644 index 00000000..b1d29ccd --- /dev/null +++ b/blas/testblas/level2bench.go @@ -0,0 +1,60 @@ +package testblas + +import ( + "math/rand" + "testing" + + "github.com/gonum/blas" +) + +func DgemvBenchmark(b *testing.B, blasser Dgemver, tA blas.Transpose, m, n, incX, incY int) { + var lenX, lenY int + if tA == blas.NoTrans { + lenX = n + lenY = m + } else { + lenX = m + lenY = n + } + xr := make([]float64, lenX) + for i := range xr { + xr[i] = rand.Float64() + } + x := makeIncremented(xr, incX, 0) + yr := make([]float64, lenY) + for i := range yr { + yr[i] = rand.Float64() + } + y := makeIncremented(yr, incY, 0) + a := make([]float64, m*n) + for i := range a { + a[i] = rand.Float64() + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + blasser.Dgemv(tA, m, n, 2, a, n, x, incX, 3, y, incY) + } +} + +func DgerBenchmark(b *testing.B, blasser Dgerer, m, n, incX, incY int) { + xr := make([]float64, m) + for i := range xr { + xr[i] = rand.Float64() + } + x := makeIncremented(xr, incX, 0) + yr := make([]float64, n) + for i := range yr { + yr[i] = rand.Float64() + } + y := makeIncremented(yr, incY, 0) + a := make([]float64, m*n) + for i := range a { + a[i] = rand.Float64() + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + blasser.Dger(m, n, 2, x, incX, y, incY, a, n) + } +}