// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT. // Copyright ©2014 The gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package native import ( "gonum.org/v1/gonum/blas" "gonum.org/v1/gonum/internal/asm/f32" ) var _ blas.Float32Level2 = Implementation{} // Sgemv computes // y = alpha * a * x + beta * y if tA = blas.NoTrans // y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if m < 0 { panic(mLT0) } if n < 0 { panic(nLT0) } if lda < max(1, n) { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } // Set up indexes lenX := m lenY := n if tA == blas.NoTrans { lenX = n lenY = m } if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { panic(badX) } if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { panic(badY) } if lda*(m-1)+n > len(a) || lda < max(1, n) { panic(badLdA) } // Quick return if possible if m == 0 || n == 0 || (alpha == 0 && beta == 1) { return } var kx, ky int if incX > 0 { kx = 0 } else { kx = -(lenX - 1) * incX } if incY > 0 { ky = 0 } else { ky = -(lenY - 1) * incY } // First form y := beta * y if incY > 0 { Implementation{}.Sscal(lenY, beta, y, incY) } else { Implementation{}.Sscal(lenY, beta, y, -incY) } if alpha == 0 { return } // Form y := alpha * A * x + y if tA == blas.NoTrans { if incX == 1 && incY == 1 { for i := 0; i < m; i++ { y[i] += alpha * f32.DotUnitary(a[lda*i:lda*i+n], x) } return } iy := ky for i := 0; i < m; i++ { y[iy] += alpha * f32.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0) iy += incY } return } // Cases where a is transposed. if incX == 1 && incY == 1 { for i := 0; i < m; i++ { tmp := alpha * x[i] if tmp != 0 { f32.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y) } } return } ix := kx for i := 0; i < m; i++ { tmp := alpha * x[ix] if tmp != 0 { f32.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky)) } ix += incX } } // Sger performs the rank-one operation // A += alpha * x * y^T // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { // Check inputs if m < 0 { panic("m < 0") } if n < 0 { panic(negativeN) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) { panic(badX) } if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { panic(badY) } if lda*(m-1)+n > len(a) || lda < max(1, n) { panic(badLdA) } if lda < max(1, n) { panic(badLdA) } // Quick return if possible if m == 0 || n == 0 || alpha == 0 { return } var ky, kx int if incY > 0 { ky = 0 } else { ky = -(n - 1) * incY } if incX > 0 { kx = 0 } else { kx = -(m - 1) * incX } if incX == 1 && incY == 1 { x = x[:m] y = y[:n] for i, xv := range x { tmp := alpha * xv if tmp != 0 { atmp := a[i*lda : i*lda+n] f32.AxpyUnitaryTo(atmp, tmp, y, atmp) } } return } ix := kx for i := 0; i < m; i++ { tmp := alpha * x[ix] if tmp != 0 { f32.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0) } ix += incX } } // Sgbmv computes // y = alpha * A * x + beta * y if tA == blas.NoTrans // y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans // where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and // m and n refer to the size of the full dense matrix it represents. // x and y are vectors, and alpha and beta are scalars. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if m < 0 { panic(mLT0) } if n < 0 { panic(nLT0) } if kL < 0 { panic(kLLT0) } if kL < 0 { panic(kULT0) } if lda < kL+kU+1 { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } // Set up indexes lenX := m lenY := n if tA == blas.NoTrans { lenX = n lenY = m } if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { panic(badX) } if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { panic(badY) } if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 { panic(badLdA) } // Quick return if possible if m == 0 || n == 0 || (alpha == 0 && beta == 1) { return } var kx, ky int if incX > 0 { kx = 0 } else { kx = -(lenX - 1) * incX } if incY > 0 { ky = 0 } else { ky = -(lenY - 1) * incY } // First form y := beta * y if incY > 0 { Implementation{}.Sscal(lenY, beta, y, incY) } else { Implementation{}.Sscal(lenY, beta, y, -incY) } if alpha == 0 { return } // i and j are indices of the compacted banded matrix. // off is the offset into the dense matrix (off + j = densej) ld := min(m, n) nCol := kU + 1 + kL if tA == blas.NoTrans { iy := ky if incX == 1 { for i := 0; i < m; i++ { l := max(0, kL-i) u := min(nCol, ld+kL-i) off := max(0, i-kL) atmp := a[i*lda+l : i*lda+u] xtmp := x[off : off+u-l] var sum float32 for j, v := range atmp { sum += xtmp[j] * v } y[iy] += sum * alpha iy += incY } return } for i := 0; i < m; i++ { l := max(0, kL-i) u := min(nCol, ld+kL-i) off := max(0, i-kL) atmp := a[i*lda+l : i*lda+u] jx := kx var sum float32 for _, v := range atmp { sum += x[off*incX+jx] * v jx += incX } y[iy] += sum * alpha iy += incY } return } if incX == 1 { for i := 0; i < m; i++ { l := max(0, kL-i) u := min(nCol, ld+kL-i) off := max(0, i-kL) atmp := a[i*lda+l : i*lda+u] tmp := alpha * x[i] jy := ky for _, v := range atmp { y[jy+off*incY] += tmp * v jy += incY } } return } ix := kx for i := 0; i < m; i++ { l := max(0, kL-i) u := min(nCol, ld+kL-i) off := max(0, i-kL) atmp := a[i*lda+l : i*lda+u] tmp := alpha * x[ix] jy := ky for _, v := range atmp { y[jy+off*incY] += tmp * v jy += incY } ix += incX } } // Strmv computes // x = A * x if tA == blas.NoTrans // x = A^T * x if tA == blas.Trans or blas.ConjTrans // A is an n×n Triangular matrix and x is a vector. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if d != blas.NonUnit && d != blas.Unit { panic(badDiag) } if n < 0 { panic(nLT0) } if lda < n { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if lda*(n-1)+n > len(a) || lda < max(1, n) { panic(badLdA) } if n == 0 { return } nonUnit := d != blas.Unit if n == 1 { if nonUnit { x[0] *= a[0] } return } var kx int if incX <= 0 { kx = -(n - 1) * incX } if tA == blas.NoTrans { if ul == blas.Upper { if incX == 1 { for i := 0; i < n; i++ { ilda := i * lda var tmp float32 if nonUnit { tmp = a[ilda+i] * x[i] } else { tmp = x[i] } xtmp := x[i+1:] x[i] = tmp + f32.DotUnitary(a[ilda+i+1:ilda+n], xtmp) } return } ix := kx for i := 0; i < n; i++ { ilda := i * lda var tmp float32 if nonUnit { tmp = a[ilda+i] * x[ix] } else { tmp = x[ix] } x[ix] = tmp + f32.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) ix += incX } return } if incX == 1 { for i := n - 1; i >= 0; i-- { ilda := i * lda var tmp float32 if nonUnit { tmp += a[ilda+i] * x[i] } else { tmp = x[i] } x[i] = tmp + f32.DotUnitary(a[ilda:ilda+i], x) } return } ix := kx + (n-1)*incX for i := n - 1; i >= 0; i-- { ilda := i * lda var tmp float32 if nonUnit { tmp = a[ilda+i] * x[ix] } else { tmp = x[ix] } x[ix] = tmp + f32.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) ix -= incX } return } // Cases where a is transposed. if ul == blas.Upper { if incX == 1 { for i := n - 1; i >= 0; i-- { ilda := i * lda xi := x[i] f32.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) if nonUnit { x[i] *= a[ilda+i] } } return } ix := kx + (n-1)*incX for i := n - 1; i >= 0; i-- { ilda := i * lda xi := x[ix] f32.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) if nonUnit { x[ix] *= a[ilda+i] } ix -= incX } return } if incX == 1 { for i := 0; i < n; i++ { ilda := i * lda xi := x[i] f32.AxpyUnitary(xi, a[ilda:ilda+i], x) if nonUnit { x[i] *= a[i*lda+i] } } return } ix := kx for i := 0; i < n; i++ { ilda := i * lda xi := x[ix] f32.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) if nonUnit { x[ix] *= a[ilda+i] } ix += incX } } // Strsv solves // A * x = b if tA == blas.NoTrans // A^T * x = b if tA == blas.Trans or blas.ConjTrans // A is an n×n triangular matrix and x is a vector. // At entry to the function, x contains the values of b, and the result is // stored in place into x. // // No test for singularity or near-singularity is included in this // routine. Such tests must be performed before calling this routine. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { // Test the input parameters // Verify inputs if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if d != blas.NonUnit && d != blas.Unit { panic(badDiag) } if n < 0 { panic(nLT0) } if lda*(n-1)+n > len(a) || lda < max(1, n) { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } // Quick return if possible if n == 0 { return } if n == 1 { if d == blas.NonUnit { x[0] /= a[0] } return } var kx int if incX < 0 { kx = -(n - 1) * incX } nonUnit := d == blas.NonUnit if tA == blas.NoTrans { if ul == blas.Upper { if incX == 1 { for i := n - 1; i >= 0; i-- { var sum float32 atmp := a[i*lda+i+1 : i*lda+n] for j, v := range atmp { jv := i + j + 1 sum += x[jv] * v } x[i] -= sum if nonUnit { x[i] /= a[i*lda+i] } } return } ix := kx + (n-1)*incX for i := n - 1; i >= 0; i-- { var sum float32 jx := ix + incX atmp := a[i*lda+i+1 : i*lda+n] for _, v := range atmp { sum += x[jx] * v jx += incX } x[ix] -= sum if nonUnit { x[ix] /= a[i*lda+i] } ix -= incX } return } if incX == 1 { for i := 0; i < n; i++ { var sum float32 atmp := a[i*lda : i*lda+i] for j, v := range atmp { sum += x[j] * v } x[i] -= sum if nonUnit { x[i] /= a[i*lda+i] } } return } ix := kx for i := 0; i < n; i++ { jx := kx var sum float32 atmp := a[i*lda : i*lda+i] for _, v := range atmp { sum += x[jx] * v jx += incX } x[ix] -= sum if nonUnit { x[ix] /= a[i*lda+i] } ix += incX } return } // Cases where a is transposed. if ul == blas.Upper { if incX == 1 { for i := 0; i < n; i++ { if nonUnit { x[i] /= a[i*lda+i] } xi := x[i] atmp := a[i*lda+i+1 : i*lda+n] for j, v := range atmp { jv := j + i + 1 x[jv] -= v * xi } } return } ix := kx for i := 0; i < n; i++ { if nonUnit { x[ix] /= a[i*lda+i] } xi := x[ix] jx := kx + (i+1)*incX atmp := a[i*lda+i+1 : i*lda+n] for _, v := range atmp { x[jx] -= v * xi jx += incX } ix += incX } return } if incX == 1 { for i := n - 1; i >= 0; i-- { if nonUnit { x[i] /= a[i*lda+i] } xi := x[i] atmp := a[i*lda : i*lda+i] for j, v := range atmp { x[j] -= v * xi } } return } ix := kx + (n-1)*incX for i := n - 1; i >= 0; i-- { if nonUnit { x[ix] /= a[i*lda+i] } xi := x[ix] jx := kx atmp := a[i*lda : i*lda+i] for _, v := range atmp { x[jx] -= v * xi jx += incX } ix -= incX } } // Ssymv computes // y = alpha * A * x + beta * y, // where a is an n×n symmetric matrix, x and y are vectors, and alpha and // beta are scalars. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { // Check inputs if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if n < 0 { panic(negativeN) } if lda > 1 && lda < n { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { panic(badY) } if lda*(n-1)+n > len(a) || lda < max(1, n) { panic(badLdA) } // Quick return if possible if n == 0 || (alpha == 0 && beta == 1) { return } // Set up start points var kx, ky int if incX > 0 { kx = 0 } else { kx = -(n - 1) * incX } if incY > 0 { ky = 0 } else { ky = -(n - 1) * incY } // Form y = beta * y if beta != 1 { if incY > 0 { Implementation{}.Sscal(n, beta, y, incY) } else { Implementation{}.Sscal(n, beta, y, -incY) } } if alpha == 0 { return } if n == 1 { y[0] += alpha * a[0] * x[0] return } if ul == blas.Upper { if incX == 1 { iy := ky for i := 0; i < n; i++ { xv := x[i] * alpha sum := x[i] * a[i*lda+i] jy := ky + (i+1)*incY atmp := a[i*lda+i+1 : i*lda+n] for j, v := range atmp { jp := j + i + 1 sum += x[jp] * v y[jy] += xv * v jy += incY } y[iy] += alpha * sum iy += incY } return } ix := kx iy := ky for i := 0; i < n; i++ { xv := x[ix] * alpha sum := x[ix] * a[i*lda+i] jx := kx + (i+1)*incX jy := ky + (i+1)*incY atmp := a[i*lda+i+1 : i*lda+n] for _, v := range atmp { sum += x[jx] * v y[jy] += xv * v jx += incX jy += incY } y[iy] += alpha * sum ix += incX iy += incY } return } // Cases where a is lower triangular. if incX == 1 { iy := ky for i := 0; i < n; i++ { jy := ky xv := alpha * x[i] atmp := a[i*lda : i*lda+i] var sum float32 for j, v := range atmp { sum += x[j] * v y[jy] += xv * v jy += incY } sum += x[i] * a[i*lda+i] sum *= alpha y[iy] += sum iy += incY } return } ix := kx iy := ky for i := 0; i < n; i++ { jx := kx jy := ky xv := alpha * x[ix] atmp := a[i*lda : i*lda+i] var sum float32 for _, v := range atmp { sum += x[jx] * v y[jy] += xv * v jx += incX jy += incY } sum += x[ix] * a[i*lda+i] sum *= alpha y[iy] += sum ix += incX iy += incY } } // Stbmv computes // x = A * x if tA == blas.NoTrans // x = A^T * x if tA == blas.Trans or blas.ConjTrans // where A is an n×n triangular banded matrix with k diagonals, and x is a vector. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if d != blas.NonUnit && d != blas.Unit { panic(badDiag) } if n < 0 { panic(nLT0) } if k < 0 { panic(kLT0) } if lda*(n-1)+k+1 > len(a) || lda < k+1 { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if n == 0 { return } var kx int if incX <= 0 { kx = -(n - 1) * incX } else if incX != 1 { kx = 0 } nonunit := d != blas.Unit if tA == blas.NoTrans { if ul == blas.Upper { if incX == 1 { for i := 0; i < n; i++ { u := min(1+k, n-i) var sum float32 atmp := a[i*lda:] xtmp := x[i:] for j := 1; j < u; j++ { sum += xtmp[j] * atmp[j] } if nonunit { sum += xtmp[0] * atmp[0] } else { sum += xtmp[0] } x[i] = sum } return } ix := kx for i := 0; i < n; i++ { u := min(1+k, n-i) var sum float32 atmp := a[i*lda:] jx := incX for j := 1; j < u; j++ { sum += x[ix+jx] * atmp[j] jx += incX } if nonunit { sum += x[ix] * atmp[0] } else { sum += x[ix] } x[ix] = sum ix += incX } return } if incX == 1 { for i := n - 1; i >= 0; i-- { l := max(0, k-i) atmp := a[i*lda:] var sum float32 for j := l; j < k; j++ { sum += x[i-k+j] * atmp[j] } if nonunit { sum += x[i] * atmp[k] } else { sum += x[i] } x[i] = sum } return } ix := kx + (n-1)*incX for i := n - 1; i >= 0; i-- { l := max(0, k-i) atmp := a[i*lda:] var sum float32 jx := l * incX for j := l; j < k; j++ { sum += x[ix-k*incX+jx] * atmp[j] jx += incX } if nonunit { sum += x[ix] * atmp[k] } else { sum += x[ix] } x[ix] = sum ix -= incX } return } if ul == blas.Upper { if incX == 1 { for i := n - 1; i >= 0; i-- { u := k + 1 if i < u { u = i + 1 } var sum float32 for j := 1; j < u; j++ { sum += x[i-j] * a[(i-j)*lda+j] } if nonunit { sum += x[i] * a[i*lda] } else { sum += x[i] } x[i] = sum } return } ix := kx + (n-1)*incX for i := n - 1; i >= 0; i-- { u := k + 1 if i < u { u = i + 1 } var sum float32 jx := incX for j := 1; j < u; j++ { sum += x[ix-jx] * a[(i-j)*lda+j] jx += incX } if nonunit { sum += x[ix] * a[i*lda] } else { sum += x[ix] } x[ix] = sum ix -= incX } return } if incX == 1 { for i := 0; i < n; i++ { u := k if i+k >= n { u = n - i - 1 } var sum float32 for j := 0; j < u; j++ { sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] } if nonunit { sum += x[i] * a[i*lda+k] } else { sum += x[i] } x[i] = sum } return } ix := kx for i := 0; i < n; i++ { u := k if i+k >= n { u = n - i - 1 } var ( sum float32 jx int ) for j := 0; j < u; j++ { sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] jx += incX } if nonunit { sum += x[ix] * a[i*lda+k] } else { sum += x[ix] } x[ix] = sum ix += incX } } // Stpmv computes // x = A * x if tA == blas.NoTrans // x = A^T * x if tA == blas.Trans or blas.ConjTrans // where A is an n×n unit triangular matrix in packed format, and x is a vector. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { // Verify inputs if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if d != blas.NonUnit && d != blas.Unit { panic(badDiag) } if n < 0 { panic(nLT0) } if len(ap) < (n*(n+1))/2 { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if n == 0 { return } var kx int if incX <= 0 { kx = -(n - 1) * incX } nonUnit := d == blas.NonUnit var offset int // Offset is the index of (i,i) if tA == blas.NoTrans { if ul == blas.Upper { if incX == 1 { for i := 0; i < n; i++ { xi := x[i] if nonUnit { xi *= ap[offset] } atmp := ap[offset+1 : offset+n-i] xtmp := x[i+1:] for j, v := range atmp { xi += v * xtmp[j] } x[i] = xi offset += n - i } return } ix := kx for i := 0; i < n; i++ { xix := x[ix] if nonUnit { xix *= ap[offset] } atmp := ap[offset+1 : offset+n-i] jx := kx + (i+1)*incX for _, v := range atmp { xix += v * x[jx] jx += incX } x[ix] = xix offset += n - i ix += incX } return } if incX == 1 { offset = n*(n+1)/2 - 1 for i := n - 1; i >= 0; i-- { xi := x[i] if nonUnit { xi *= ap[offset] } atmp := ap[offset-i : offset] for j, v := range atmp { xi += v * x[j] } x[i] = xi offset -= i + 1 } return } ix := kx + (n-1)*incX offset = n*(n+1)/2 - 1 for i := n - 1; i >= 0; i-- { xix := x[ix] if nonUnit { xix *= ap[offset] } atmp := ap[offset-i : offset] jx := kx for _, v := range atmp { xix += v * x[jx] jx += incX } x[ix] = xix offset -= i + 1 ix -= incX } return } // Cases where ap is transposed. if ul == blas.Upper { if incX == 1 { offset = n*(n+1)/2 - 1 for i := n - 1; i >= 0; i-- { xi := x[i] atmp := ap[offset+1 : offset+n-i] xtmp := x[i+1:] for j, v := range atmp { xtmp[j] += v * xi } if nonUnit { x[i] *= ap[offset] } offset -= n - i + 1 } return } ix := kx + (n-1)*incX offset = n*(n+1)/2 - 1 for i := n - 1; i >= 0; i-- { xix := x[ix] jx := kx + (i+1)*incX atmp := ap[offset+1 : offset+n-i] for _, v := range atmp { x[jx] += v * xix jx += incX } if nonUnit { x[ix] *= ap[offset] } offset -= n - i + 1 ix -= incX } return } if incX == 1 { for i := 0; i < n; i++ { xi := x[i] atmp := ap[offset-i : offset] for j, v := range atmp { x[j] += v * xi } if nonUnit { x[i] *= ap[offset] } offset += i + 2 } return } ix := kx for i := 0; i < n; i++ { xix := x[ix] jx := kx atmp := ap[offset-i : offset] for _, v := range atmp { x[jx] += v * xix jx += incX } if nonUnit { x[ix] *= ap[offset] } ix += incX offset += i + 2 } } // Stbsv solves // A * x = b // where A is an n×n triangular banded matrix with k diagonals in packed format, // and x is a vector. // At entry to the function, x contains the values of b, and the result is // stored in place into x. // // No test for singularity or near-singularity is included in this // routine. Such tests must be performed before calling this routine. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if d != blas.NonUnit && d != blas.Unit { panic(badDiag) } if n < 0 { panic(nLT0) } if lda*(n-1)+k+1 > len(a) || lda < k+1 { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if n == 0 { return } var kx int if incX < 0 { kx = -(n - 1) * incX } else { kx = 0 } nonUnit := d == blas.NonUnit // Form x = A^-1 x. // Several cases below use subslices for speed improvement. // The incX != 1 cases usually do not because incX may be negative. if tA == blas.NoTrans { if ul == blas.Upper { if incX == 1 { for i := n - 1; i >= 0; i-- { bands := k if i+bands >= n { bands = n - i - 1 } atmp := a[i*lda+1:] xtmp := x[i+1 : i+bands+1] var sum float32 for j, v := range xtmp { sum += v * atmp[j] } x[i] -= sum if nonUnit { x[i] /= a[i*lda] } } return } ix := kx + (n-1)*incX for i := n - 1; i >= 0; i-- { max := k + 1 if i+max > n { max = n - i } atmp := a[i*lda:] var ( jx int sum float32 ) for j := 1; j < max; j++ { jx += incX sum += x[ix+jx] * atmp[j] } x[ix] -= sum if nonUnit { x[ix] /= atmp[0] } ix -= incX } return } if incX == 1 { for i := 0; i < n; i++ { bands := k if i-k < 0 { bands = i } atmp := a[i*lda+k-bands:] xtmp := x[i-bands : i] var sum float32 for j, v := range xtmp { sum += v * atmp[j] } x[i] -= sum if nonUnit { x[i] /= atmp[bands] } } return } ix := kx for i := 0; i < n; i++ { bands := k if i-k < 0 { bands = i } atmp := a[i*lda+k-bands:] var ( sum float32 jx int ) for j := 0; j < bands; j++ { sum += x[ix-bands*incX+jx] * atmp[j] jx += incX } x[ix] -= sum if nonUnit { x[ix] /= atmp[bands] } ix += incX } return } // Cases where a is transposed. if ul == blas.Upper { if incX == 1 { for i := 0; i < n; i++ { bands := k if i-k < 0 { bands = i } var sum float32 for j := 0; j < bands; j++ { sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] } x[i] -= sum if nonUnit { x[i] /= a[i*lda] } } return } ix := kx for i := 0; i < n; i++ { bands := k if i-k < 0 { bands = i } var ( sum float32 jx int ) for j := 0; j < bands; j++ { sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] jx += incX } x[ix] -= sum if nonUnit { x[ix] /= a[i*lda] } ix += incX } return } if incX == 1 { for i := n - 1; i >= 0; i-- { bands := k if i+bands >= n { bands = n - i - 1 } var sum float32 xtmp := x[i+1 : i+1+bands] for j, v := range xtmp { sum += v * a[(i+j+1)*lda+k-j-1] } x[i] -= sum if nonUnit { x[i] /= a[i*lda+k] } } return } ix := kx + (n-1)*incX for i := n - 1; i >= 0; i-- { bands := k if i+bands >= n { bands = n - i - 1 } var ( sum float32 jx int ) for j := 0; j < bands; j++ { sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] jx += incX } x[ix] -= sum if nonUnit { x[ix] /= a[i*lda+k] } ix -= incX } } // Ssbmv performs // y = alpha * A * x + beta * y // where A is an n×n symmetric banded matrix, x and y are vectors, and alpha // and beta are scalars. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if n < 0 { panic(nLT0) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { panic(badY) } if lda*(n-1)+k+1 > len(a) || lda < k+1 { panic(badLdA) } // Quick return if possible if n == 0 || (alpha == 0 && beta == 1) { return } // Set up indexes lenX := n lenY := n var kx, ky int if incX > 0 { kx = 0 } else { kx = -(lenX - 1) * incX } if incY > 0 { ky = 0 } else { ky = -(lenY - 1) * incY } // First form y := beta * y if incY > 0 { Implementation{}.Sscal(lenY, beta, y, incY) } else { Implementation{}.Sscal(lenY, beta, y, -incY) } if alpha == 0 { return } if ul == blas.Upper { if incX == 1 { iy := ky for i := 0; i < n; i++ { atmp := a[i*lda:] tmp := alpha * x[i] sum := tmp * atmp[0] u := min(k, n-i-1) jy := incY for j := 1; j <= u; j++ { v := atmp[j] sum += alpha * x[i+j] * v y[iy+jy] += tmp * v jy += incY } y[iy] += sum iy += incY } return } ix := kx iy := ky for i := 0; i < n; i++ { atmp := a[i*lda:] tmp := alpha * x[ix] sum := tmp * atmp[0] u := min(k, n-i-1) jx := incX jy := incY for j := 1; j <= u; j++ { v := atmp[j] sum += alpha * x[ix+jx] * v y[iy+jy] += tmp * v jx += incX jy += incY } y[iy] += sum ix += incX iy += incY } return } // Casses where a has bands below the diagonal. if incX == 1 { iy := ky for i := 0; i < n; i++ { l := max(0, k-i) tmp := alpha * x[i] jy := l * incY atmp := a[i*lda:] for j := l; j < k; j++ { v := atmp[j] y[iy] += alpha * v * x[i-k+j] y[iy-k*incY+jy] += tmp * v jy += incY } y[iy] += tmp * atmp[k] iy += incY } return } ix := kx iy := ky for i := 0; i < n; i++ { l := max(0, k-i) tmp := alpha * x[ix] jx := l * incX jy := l * incY atmp := a[i*lda:] for j := l; j < k; j++ { v := atmp[j] y[iy] += alpha * v * x[ix-k*incX+jx] y[iy-k*incY+jy] += tmp * v jx += incX jy += incY } y[iy] += tmp * atmp[k] ix += incX iy += incY } return } // Ssyr performs the rank-one update // a += alpha * x * x^T // where a is an n×n symmetric matrix, and x is a vector. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) { if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if n < 0 { panic(nLT0) } if incX == 0 { panic(zeroIncX) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if lda*(n-1)+n > len(a) || lda < max(1, n) { panic(badLdA) } if alpha == 0 || n == 0 { return } lenX := n var kx int if incX > 0 { kx = 0 } else { kx = -(lenX - 1) * incX } if ul == blas.Upper { if incX == 1 { for i := 0; i < n; i++ { tmp := x[i] * alpha if tmp != 0 { atmp := a[i*lda+i : i*lda+n] xtmp := x[i:n] for j, v := range xtmp { atmp[j] += v * tmp } } } return } ix := kx for i := 0; i < n; i++ { tmp := x[ix] * alpha if tmp != 0 { jx := ix atmp := a[i*lda:] for j := i; j < n; j++ { atmp[j] += x[jx] * tmp jx += incX } } ix += incX } return } // Cases where a is lower triangular. if incX == 1 { for i := 0; i < n; i++ { tmp := x[i] * alpha if tmp != 0 { atmp := a[i*lda:] xtmp := x[:i+1] for j, v := range xtmp { atmp[j] += tmp * v } } } return } ix := kx for i := 0; i < n; i++ { tmp := x[ix] * alpha if tmp != 0 { atmp := a[i*lda:] jx := kx for j := 0; j < i+1; j++ { atmp[j] += tmp * x[jx] jx += incX } } ix += incX } } // Ssyr2 performs the symmetric rank-two update // A += alpha * x * y^T + alpha * y * x^T // where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if n < 0 { panic(nLT0) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { panic(badY) } if lda*(n-1)+n > len(a) || lda < max(1, n) { panic(badLdA) } if alpha == 0 { return } var ky, kx int if incY > 0 { ky = 0 } else { ky = -(n - 1) * incY } if incX > 0 { kx = 0 } else { kx = -(n - 1) * incX } if ul == blas.Upper { if incX == 1 && incY == 1 { for i := 0; i < n; i++ { xi := x[i] yi := y[i] atmp := a[i*lda:] for j := i; j < n; j++ { atmp[j] += alpha * (xi*y[j] + x[j]*yi) } } return } ix := kx iy := ky for i := 0; i < n; i++ { jx := kx + i*incX jy := ky + i*incY xi := x[ix] yi := y[iy] atmp := a[i*lda:] for j := i; j < n; j++ { atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) jx += incX jy += incY } ix += incX iy += incY } return } if incX == 1 && incY == 1 { for i := 0; i < n; i++ { xi := x[i] yi := y[i] atmp := a[i*lda:] for j := 0; j <= i; j++ { atmp[j] += alpha * (xi*y[j] + x[j]*yi) } } return } ix := kx iy := ky for i := 0; i < n; i++ { jx := kx jy := ky xi := x[ix] yi := y[iy] atmp := a[i*lda:] for j := 0; j <= i; j++ { atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) jx += incX jy += incY } ix += incX iy += incY } return } // Stpsv solves // A * x = b if tA == blas.NoTrans // A^T * x = b if tA == blas.Trans or blas.ConjTrans // where A is an n×n triangular matrix in packed format and x is a vector. // At entry to the function, x contains the values of b, and the result is // stored in place into x. // // No test for singularity or near-singularity is included in this // routine. Such tests must be performed before calling this routine. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { // Verify inputs if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if d != blas.NonUnit && d != blas.Unit { panic(badDiag) } if n < 0 { panic(nLT0) } if len(ap) < (n*(n+1))/2 { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if n == 0 { return } var kx int if incX <= 0 { kx = -(n - 1) * incX } nonUnit := d == blas.NonUnit var offset int // Offset is the index of (i,i) if tA == blas.NoTrans { if ul == blas.Upper { offset = n*(n+1)/2 - 1 if incX == 1 { for i := n - 1; i >= 0; i-- { atmp := ap[offset+1 : offset+n-i] xtmp := x[i+1:] var sum float32 for j, v := range atmp { sum += v * xtmp[j] } x[i] -= sum if nonUnit { x[i] /= ap[offset] } offset -= n - i + 1 } return } ix := kx + (n-1)*incX for i := n - 1; i >= 0; i-- { atmp := ap[offset+1 : offset+n-i] jx := kx + (i+1)*incX var sum float32 for _, v := range atmp { sum += v * x[jx] jx += incX } x[ix] -= sum if nonUnit { x[ix] /= ap[offset] } ix -= incX offset -= n - i + 1 } return } if incX == 1 { for i := 0; i < n; i++ { atmp := ap[offset-i : offset] var sum float32 for j, v := range atmp { sum += v * x[j] } x[i] -= sum if nonUnit { x[i] /= ap[offset] } offset += i + 2 } return } ix := kx for i := 0; i < n; i++ { jx := kx atmp := ap[offset-i : offset] var sum float32 for _, v := range atmp { sum += v * x[jx] jx += incX } x[ix] -= sum if nonUnit { x[ix] /= ap[offset] } ix += incX offset += i + 2 } return } // Cases where ap is transposed. if ul == blas.Upper { if incX == 1 { for i := 0; i < n; i++ { if nonUnit { x[i] /= ap[offset] } xi := x[i] atmp := ap[offset+1 : offset+n-i] xtmp := x[i+1:] for j, v := range atmp { xtmp[j] -= v * xi } offset += n - i } return } ix := kx for i := 0; i < n; i++ { if nonUnit { x[ix] /= ap[offset] } xix := x[ix] atmp := ap[offset+1 : offset+n-i] jx := kx + (i+1)*incX for _, v := range atmp { x[jx] -= v * xix jx += incX } ix += incX offset += n - i } return } if incX == 1 { offset = n*(n+1)/2 - 1 for i := n - 1; i >= 0; i-- { if nonUnit { x[i] /= ap[offset] } xi := x[i] atmp := ap[offset-i : offset] for j, v := range atmp { x[j] -= v * xi } offset -= i + 1 } return } ix := kx + (n-1)*incX offset = n*(n+1)/2 - 1 for i := n - 1; i >= 0; i-- { if nonUnit { x[ix] /= ap[offset] } xix := x[ix] atmp := ap[offset-i : offset] jx := kx for _, v := range atmp { x[jx] -= v * xix jx += incX } ix -= incX offset -= i + 1 } } // Sspmv performs // y = alpha * A * x + beta * y, // where A is an n×n symmetric matrix in packed format, x and y are vectors // and alpha and beta are scalars. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, a []float32, x []float32, incX int, beta float32, y []float32, incY int) { // Verify inputs if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if n < 0 { panic(nLT0) } if len(a) < (n*(n+1))/2 { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { panic(badY) } // Quick return if possible if n == 0 || (alpha == 0 && beta == 1) { return } // Set up start points var kx, ky int if incX > 0 { kx = 0 } else { kx = -(n - 1) * incX } if incY > 0 { ky = 0 } else { ky = -(n - 1) * incY } // Form y = beta * y if beta != 1 { if incY > 0 { Implementation{}.Sscal(n, beta, y, incY) } else { Implementation{}.Sscal(n, beta, y, -incY) } } if alpha == 0 { return } if n == 1 { y[0] += alpha * a[0] * x[0] return } var offset int // Offset is the index of (i,i). if ul == blas.Upper { if incX == 1 { iy := ky for i := 0; i < n; i++ { xv := x[i] * alpha sum := a[offset] * x[i] atmp := a[offset+1 : offset+n-i] xtmp := x[i+1:] jy := ky + (i+1)*incY for j, v := range atmp { sum += v * xtmp[j] y[jy] += v * xv jy += incY } y[iy] += alpha * sum iy += incY offset += n - i } return } ix := kx iy := ky for i := 0; i < n; i++ { xv := x[ix] * alpha sum := a[offset] * x[ix] atmp := a[offset+1 : offset+n-i] jx := kx + (i+1)*incX jy := ky + (i+1)*incY for _, v := range atmp { sum += v * x[jx] y[jy] += v * xv jx += incX jy += incY } y[iy] += alpha * sum ix += incX iy += incY offset += n - i } return } if incX == 1 { iy := ky for i := 0; i < n; i++ { xv := x[i] * alpha atmp := a[offset-i : offset] jy := ky var sum float32 for j, v := range atmp { sum += v * x[j] y[jy] += v * xv jy += incY } sum += a[offset] * x[i] y[iy] += alpha * sum iy += incY offset += i + 2 } return } ix := kx iy := ky for i := 0; i < n; i++ { xv := x[ix] * alpha atmp := a[offset-i : offset] jx := kx jy := ky var sum float32 for _, v := range atmp { sum += v * x[jx] y[jy] += v * xv jx += incX jy += incY } sum += a[offset] * x[ix] y[iy] += alpha * sum ix += incX iy += incY offset += i + 2 } } // Sspr computes the rank-one operation // a += alpha * x * x^T // where a is an n×n symmetric matrix in packed format, x is a vector, and // alpha is a scalar. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32) { if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if n < 0 { panic(nLT0) } if incX == 0 { panic(zeroIncX) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if len(a) < (n*(n+1))/2 { panic(badLdA) } if alpha == 0 || n == 0 { return } lenX := n var kx int if incX > 0 { kx = 0 } else { kx = -(lenX - 1) * incX } var offset int // Offset is the index of (i,i). if ul == blas.Upper { if incX == 1 { for i := 0; i < n; i++ { atmp := a[offset:] xv := alpha * x[i] xtmp := x[i:n] for j, v := range xtmp { atmp[j] += xv * v } offset += n - i } return } ix := kx for i := 0; i < n; i++ { jx := kx + i*incX atmp := a[offset:] xv := alpha * x[ix] for j := 0; j < n-i; j++ { atmp[j] += xv * x[jx] jx += incX } ix += incX offset += n - i } return } if incX == 1 { for i := 0; i < n; i++ { atmp := a[offset-i:] xv := alpha * x[i] xtmp := x[:i+1] for j, v := range xtmp { atmp[j] += xv * v } offset += i + 2 } return } ix := kx for i := 0; i < n; i++ { jx := kx atmp := a[offset-i:] xv := alpha * x[ix] for j := 0; j <= i; j++ { atmp[j] += xv * x[jx] jx += incX } ix += incX offset += i + 2 } } // Sspr2 performs the symmetric rank-2 update // A += alpha * x * y^T + alpha * y * x^T, // where A is an n×n symmetric matrix in packed format, x and y are vectors, // and alpha is a scalar. // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) { if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if n < 0 { panic(nLT0) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { panic(badY) } if len(ap) < (n*(n+1))/2 { panic(badLdA) } if alpha == 0 { return } var ky, kx int if incY > 0 { ky = 0 } else { ky = -(n - 1) * incY } if incX > 0 { kx = 0 } else { kx = -(n - 1) * incX } var offset int // Offset is the index of (i,i). if ul == blas.Upper { if incX == 1 && incY == 1 { for i := 0; i < n; i++ { atmp := ap[offset:] xi := x[i] yi := y[i] xtmp := x[i:n] ytmp := y[i:n] for j, v := range xtmp { atmp[j] += alpha * (xi*ytmp[j] + v*yi) } offset += n - i } return } ix := kx iy := ky for i := 0; i < n; i++ { jx := kx + i*incX jy := ky + i*incY atmp := ap[offset:] xi := x[ix] yi := y[iy] for j := 0; j < n-i; j++ { atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) jx += incX jy += incY } ix += incX iy += incY offset += n - i } return } if incX == 1 && incY == 1 { for i := 0; i < n; i++ { atmp := ap[offset-i:] xi := x[i] yi := y[i] xtmp := x[:i+1] for j, v := range xtmp { atmp[j] += alpha * (xi*y[j] + v*yi) } offset += i + 2 } return } ix := kx iy := ky for i := 0; i < n; i++ { jx := kx jy := ky atmp := ap[offset-i:] for j := 0; j <= i; j++ { atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) jx += incX jy += incY } ix += incX iy += incY offset += i + 2 } }