diff --git a/goblas/blasops.go b/goblas/blasops.go new file mode 100644 index 00000000..a94feaa8 --- /dev/null +++ b/goblas/blasops.go @@ -0,0 +1,9 @@ +package goblas + +import "github.com/gonum/blas" + +type Blas struct{} + +var ( + _ blas.Float64 = Blas{} +) diff --git a/goblas/dasum.go b/goblas/dasum.go new file mode 100644 index 00000000..7f7421a5 --- /dev/null +++ b/goblas/dasum.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Dasum(N int, X []float64, incX int) float64 { + return blas.Dasum(N, X, incX) +} diff --git a/goblas/daxpy.go b/goblas/daxpy.go new file mode 100644 index 00000000..193692fe --- /dev/null +++ b/goblas/daxpy.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Daxpy(N int, alpha float64, X []float64, incX int, Y []float64, incY int) { + blas.Daxpy(N, alpha, X, incX, Y, incY) +} diff --git a/goblas/dcopy.go b/goblas/dcopy.go new file mode 100644 index 00000000..98bba51f --- /dev/null +++ b/goblas/dcopy.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Dcopy(N int, X []float64, incX int, Y []float64, incY int) { + blas.Dcopy(N, X, incX, Y, incY) +} diff --git a/goblas/ddot.go b/goblas/ddot.go new file mode 100644 index 00000000..d9691c7b --- /dev/null +++ b/goblas/ddot.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Ddot(N int, X []float64, incX int, Y []float64, incY int) float64 { + return blas.Ddot(N, X, incX, Y, incY) +} diff --git a/goblas/dgemm.go b/goblas/dgemm.go new file mode 100644 index 00000000..03aba30b --- /dev/null +++ b/goblas/dgemm.go @@ -0,0 +1,45 @@ +package goblas + +import "github.com/gonum/blas" +import level1 "github.com/ziutek/blas" + +func (Blas) Dgemm(o blas.Order, tA, tB blas.Transpose, m int, n int, k int, + alpha float64, a []float64, lda int, b []float64, ldb int, + beta float64, c []float64, ldc int) { + + var inner, outer, veclen int + if o == blas.ColMajor { + outer = n + veclen = m + } else { + veclen = n + outer = m + a, b = b, a + ldb, lda = lda, ldb + tA, tB = tB, tA + } + inner = k + + for j := 0; j < outer; j++ { + cj := c[j*ldc:] + if beta != 1 { + level1.Dscal(veclen, beta, cj, 1) + } + + for l := 0; l < inner; l++ { + al := a[l*lda:] + if tA == blas.Trans { + al = a[l:] + } + blj := b[j*ldb+l] + if tB == blas.Trans { + blj = b[l*ldb+j] + } + if tA == blas.NoTrans { + level1.Daxpy(veclen, blj*alpha, al, 1, cj, 1) + } else { + level1.Daxpy(veclen, blj*alpha, al, lda, cj, 1) + } + } + } +} diff --git a/goblas/dgemv.go b/goblas/dgemv.go new file mode 100644 index 00000000..41870d77 --- /dev/null +++ b/goblas/dgemv.go @@ -0,0 +1,22 @@ +package goblas + +import "github.com/gonum/blas" +import level1 "github.com/ziutek/blas" + +// Performs: y = alpha * A * x + beta * y or y = alpha * A^T * x + beta * y +func (Blas) Dgemv(o blas.Order, tA blas.Transpose, m, n int, alpha float64, a []float64, + lda int, x []float64, incX int, beta float64, y []float64, incY int) { + + Blas{}.Dscal(m, beta, y, incY) + if (o == blas.ColMajor && tA == blas.NoTrans) || + (o == blas.RowMajor && tA == blas.Trans) { + for c := 0; c < n; c++ { + level1.Daxpy(m, alpha*x[incX*c], a[c*lda:], 1, y, incY) + } + } else { + for r := 0; r < m; r++ { + y[r*incY] += alpha * level1.Ddot(n, a[r*lda:], 1, x, incX) + } + } + +} diff --git a/goblas/dnrm2.go b/goblas/dnrm2.go new file mode 100644 index 00000000..5fd860dd --- /dev/null +++ b/goblas/dnrm2.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Dnrm2(N int, X []float64, incX int) float64 { + return blas.Dnrm2(N, X, incX) +} diff --git a/goblas/doc.go b/goblas/doc.go new file mode 100644 index 00000000..44f4929f --- /dev/null +++ b/goblas/doc.go @@ -0,0 +1,2 @@ +// Go implementation of BLAS (Basic Linear Algebra Subprograms) +package goblas diff --git a/goblas/drot.go b/goblas/drot.go new file mode 100644 index 00000000..b0eedd7c --- /dev/null +++ b/goblas/drot.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Drot(N int, X []float64, incX int, Y []float64, incY int, c, s float64) { + blas.Drot(N, X, incX, Y, incY, c, s) +} diff --git a/goblas/drotg.go b/goblas/drotg.go new file mode 100644 index 00000000..35bb81d0 --- /dev/null +++ b/goblas/drotg.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Drotg(a, b float64) (c, s, r, z float64) { + return blas.Drotg(a, b) +} diff --git a/goblas/drotmg.go b/goblas/drotmg.go new file mode 100644 index 00000000..b6c496cb --- /dev/null +++ b/goblas/drotmg.go @@ -0,0 +1,103 @@ +package goblas + +import "math" + +import "github.com/gonum/blas" + +// Compute a modified Givens transformation +func (Blas) Drotmg(d1, d2, x1, y1 float64) (p *blas.DrotmParams, rd1, rd2, rx1 float64) { + var p1, p2, q1, q2, u float64 + + gam := 4096.0 + gamsq := 16777216.0 + rgamsq := 5.9604645e-8 + + rd1 = d1 + rd2 = d2 + rx1 = x1 + + if d1 < 0 { + p.Flag = -1 + } else { + p2 = rd2 * y1 + if p2 == 0 { + p.Flag = -2 + return + } + p1 = rd1 * x1 + q2 = p2 * y1 + q1 = p1 * x1 + if math.Abs(q1) > math.Abs(q2) { + p.H[1] = -y1 / x1 + p.H[2] = p2 / p1 + u = 1 - p.H[2]*p.H[1] + if u > 0 { + p.Flag = 0 + rd1 /= u + rd2 /= u + rx1 /= u + } + } else { + if q2 < 0 { + p.Flag = -1 + rd1 = 0 + rd2 = 0 + rx1 = 0 + } else { + p.Flag = 1 + p.H[0] = p1 / p2 + p.H[3] = x1 / y1 + u = 1 + p.H[0] + p.H[3] + rd1, rd2 = rd2/u, rd1/u + rx1 = y1 / u + } + } + if rd1 != 0 { + for rd1 <= rgamsq || rd1 >= gamsq { + if p.Flag == 0 { + p.H[0] = 1 + p.H[3] = 1 + p.Flag = -1 + } else { + p.H[1] = -1 + p.H[2] = 1 + p.Flag = -1 + } + if rd1 <= rgamsq { + rd1 *= gam * gam + rx1 /= gam + p.H[0] /= gam + p.H[2] /= gam + } else { + rd1 /= gam * gam + rx1 *= gam + p.H[0] *= gam + p.H[2] *= gam + } + } + } + if rd2 != 0 { + for math.Abs(rd2) <= rgamsq || math.Abs(rd2) >= gamsq { + if p.Flag == 0 { + p.H[0] = 1 + p.H[3] = 1 + p.Flag = -1 + } else { + p.H[1] = -1 + p.H[2] = 1 + p.Flag = -1 + } + if math.Abs(rd2) <= rgamsq { + rd2 *= gam * gam + p.H[1] /= gam + p.H[3] /= gam + } else { + rd2 /= gam * gam + p.H[1] *= gam + p.H[3] *= gam + } + } + } + } + return +} diff --git a/goblas/dscal.go b/goblas/dscal.go new file mode 100644 index 00000000..93f7583c --- /dev/null +++ b/goblas/dscal.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Dscal(N int, alpha float64, X []float64, incX int) { + blas.Dscal(N, alpha, X, incX) +} diff --git a/goblas/dswap.go b/goblas/dswap.go new file mode 100644 index 00000000..dcf18059 --- /dev/null +++ b/goblas/dswap.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Dswap(N int, X []float64, incX int, Y []float64, incY int) { + blas.Dswap(N, X, incX, Y, incY) +} diff --git a/goblas/idamax.go b/goblas/idamax.go new file mode 100644 index 00000000..b15833aa --- /dev/null +++ b/goblas/idamax.go @@ -0,0 +1,7 @@ +package goblas + +import "github.com/ziutek/blas" + +func (Blas) Idamax(N int, X []float64, incX int) int { + return blas.Idamax(N, X, incX) +} diff --git a/goblas/stubs.go b/goblas/stubs.go new file mode 100644 index 00000000..7f406920 --- /dev/null +++ b/goblas/stubs.go @@ -0,0 +1,49 @@ +package goblas + +import "github.com/gonum/blas" + +func (Blas) Drotm(n int, x []float64, incX int, y []float64, incY int, p *blas.DrotmParams) { +} + +func (Blas) Dgbmv(o blas.Order, tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { +} +func (Blas) Dtrmv(o blas.Order, ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { +} +func (Blas) Dtbmv(o blas.Order, ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { +} +func (Blas) Dtpmv(o blas.Order, ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { +} +func (Blas) Dtrsv(o blas.Order, ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { +} +func (Blas) Dtbsv(o blas.Order, ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { +} +func (Blas) Dtpsv(o blas.Order, ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { +} +func (Blas) Dsymv(o blas.Order, ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { +} +func (Blas) Dsbmv(o blas.Order, ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { +} +func (Blas) Dspmv(o blas.Order, ul blas.Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int) { +} +func (Blas) Dger(o blas.Order, m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { +} +func (Blas) Dsyr(o blas.Order, ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) { +} +func (Blas) Dspr(o blas.Order, ul blas.Uplo, n int, alpha float64, x []float64, incX int, ap []float64) { +} +func (Blas) Dsyr2(o blas.Order, ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { +} +func (Blas) Dspr2(o blas.Order, ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64) { +} + +// Level 3 routines. +func (Blas) Dsymm(o blas.Order, s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { +} +func (Blas) Dsyrk(o blas.Order, ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) { +} +func (Blas) Dsyr2k(o blas.Order, ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { +} +func (Blas) Dtrmm(o blas.Order, s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { +} +func (Blas) Dtrsm(o blas.Order, s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { +}