From 58e39c9afd8c42b0e47273fb8f5800f638fc6214 Mon Sep 17 00:00:00 2001 From: Dan Kortschak Date: Thu, 1 Mar 2018 04:27:55 +1030 Subject: [PATCH] blas/gonum: rewrite drotmg using Hopkins' implementation Also fix blas.Flag documentation and explicitly state values. --- blas/blas.go | 8 +- blas/gonum/level1double.go | 186 ++++++++++++++++------------------ blas/gonum/level1single.go | 186 ++++++++++++++++------------------ blas/testblas/level1double.go | 150 ++++++++++++++++++++++++++- 4 files changed, 325 insertions(+), 205 deletions(-) diff --git a/blas/blas.go b/blas/blas.go index 43700d02..ec61c456 100644 --- a/blas/blas.go +++ b/blas/blas.go @@ -10,10 +10,10 @@ package blas type Flag int const ( - Identity Flag = iota - 2 // H is the identity matrix; no rotation is needed. - Rescaling // H specifies rescaling. - OffDiagonal // Off-diagonal elements of H are units. - Diagonal // Diagonal elements of H are units. + Identity Flag = -2 // H is the identity matrix; no rotation is needed. + Rescaling Flag = -1 // H specifies rescaling. + OffDiagonal Flag = 0 // Off-diagonal elements of H are non-unit. + Diagonal Flag = 1 // Diagonal elements of H are non-unit. ) // SrotmParams contains Givens transformation parameters returned diff --git a/blas/gonum/level1double.go b/blas/gonum/level1double.go index e7aff030..1b4a76a1 100644 --- a/blas/gonum/level1double.go +++ b/blas/gonum/level1double.go @@ -326,122 +326,108 @@ func (Implementation) Drotg(a, b float64) (c, s, r, z float64) { // http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html // for more details. func (Implementation) Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) { - var p1, p2, q1, q2, u float64 + // The implementation of Drotmg used here is taken from Hopkins 1997 + // Appendix A: https://doi.org/10.1145/289251.289253 + // with the exception of the gam constants below. const ( gam = 4096.0 - gamsq = 16777216.0 - rgamsq = 5.9604645e-8 + gamsq = gam * gam + rgamsq = 1.0 / gamsq ) if d1 < 0 { - p.Flag = blas.Rescaling - return + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 } - p2 = d2 * y1 - if p2 == 0 { + var h11, h12, h21, h22 float64 + if d2 == 0 || y1 == 0 { p.Flag = blas.Identity - rd1 = d1 - rd2 = d2 - rx1 = x1 - return - } - p1 = d1 * x1 - q2 = p2 * y1 - q1 = p1 * x1 - - absQ1 := math.Abs(q1) - absQ2 := math.Abs(q2) - - if absQ1 < absQ2 && q2 < 0 { - p.Flag = blas.Rescaling - return - } - - if d1 == 0 { + return p, d1, d2, x1 + } else if (d1 == 0 || x1 == 0) && d2 > 0 { p.Flag = blas.Diagonal - p.H[0] = p1 / p2 - p.H[3] = x1 / y1 - u = 1 + p.H[0]*p.H[3] - rd1, rd2 = d2/u, d1/u - rx1 = y1 / u - return - } - - // Now we know that d1 != 0, and d2 != 0. If d2 == 0, it would be caught - // when p2 == 0, and if d1 == 0, then it is caught above - - if absQ1 > absQ2 { - p.H[1] = -y1 / x1 - p.H[2] = p2 / p1 - u = 1 - p.H[2]*p.H[1] - rd1 = d1 - rd2 = d2 - rx1 = x1 - p.Flag = blas.OffDiagonal - // u must be greater than zero because |q1| > |q2|, so check from netlib - // is unnecessary - // This is left in for ease of comparison with complex routines - //if u > 0 { - rd1 /= u - rd2 /= u - rx1 *= u - //} + h12 = 1 + h21 = -1 + x1 = y1 + d1, d2 = d2, d1 } else { - p.Flag = blas.Diagonal - p.H[0] = p1 / p2 - p.H[3] = x1 / y1 - u = 1 + p.H[0]*p.H[3] - rd1 = d2 / u - rd2 = d1 / u - rx1 = y1 * u - } - - for rd1 <= rgamsq || rd1 >= gamsq { - if p.Flag == blas.OffDiagonal { - p.H[0] = 1 - p.H[3] = 1 - p.Flag = blas.Rescaling - } else if p.Flag == blas.Diagonal { - p.H[1] = -1 - p.H[2] = 1 - p.Flag = blas.Rescaling - } - if rd1 <= rgamsq { - rd1 *= gam * gam - rx1 /= gam - p.H[0] /= gam - p.H[2] /= gam + p2 := d2 * y1 + p1 := d1 * x1 + q2 := p2 * y1 + q1 := p1 * x1 + if math.Abs(q1) > math.Abs(q2) { + p.Flag = blas.OffDiagonal + h11 = 1 + h22 = 1 + h21 = -y1 / x1 + h12 = p2 / p1 + u := 1 - h12*h21 + if u <= 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } else { + d1 /= u + d2 /= u + x1 *= u + } } else { - rd1 /= gam * gam - rx1 *= gam - p.H[0] *= gam - p.H[2] *= gam + if q2 < 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } else { + p.Flag = blas.Diagonal + h21 = -1 + h12 = 1 + h11 = p1 / p2 + h22 = x1 / y1 + u := 1 + h11*h22 + d1, d2 = d2/u, d1/u + x1 = y1 * u + } } } - for math.Abs(rd2) <= rgamsq || math.Abs(rd2) >= gamsq { - if p.Flag == blas.OffDiagonal { - p.H[0] = 1 - p.H[3] = 1 - p.Flag = blas.Rescaling - } else if p.Flag == blas.Diagonal { - p.H[1] = -1 - p.H[2] = 1 - p.Flag = blas.Rescaling - } - if math.Abs(rd2) <= rgamsq { - rd2 *= gam * gam - p.H[1] /= gam - p.H[3] /= gam - } else { - rd2 /= gam * gam - p.H[1] *= gam - p.H[3] *= gam - } + for d1 <= rgamsq && d1 != 0 { + p.Flag = blas.Rescaling + d1 = (d1 * gam) * gam + x1 /= gam + h11 /= gam + h12 /= gam } - return + for d1 > gamsq { + p.Flag = blas.Rescaling + d1 = (d1 / gam) / gam + x1 *= gam + h11 *= gam + h12 *= gam + } + + for math.Abs(d2) <= rgamsq && d2 != 0 { + p.Flag = blas.Rescaling + d2 = (d2 * gam) * gam + h21 /= gam + h22 /= gam + } + for math.Abs(d2) > gamsq { + p.Flag = blas.Rescaling + d2 = (d2 / gam) / gam + h21 *= gam + h22 *= gam + } + + switch p.Flag { + case blas.Diagonal: + p.H = [4]float64{0: h11, 3: h22} + case blas.OffDiagonal: + p.H = [4]float64{1: h21, 2: h12} + case blas.Rescaling: + p.H = [4]float64{h11, h21, h12, h22} + default: + panic("blas: unexpected blas.Flag") + } + + return p, d1, d2, x1 } // Drot applies a plane transformation. diff --git a/blas/gonum/level1single.go b/blas/gonum/level1single.go index 196d44be..bdf7c27b 100644 --- a/blas/gonum/level1single.go +++ b/blas/gonum/level1single.go @@ -344,122 +344,108 @@ func (Implementation) Srotg(a, b float32) (c, s, r, z float32) { // // Float32 implementations are autogenerated and not directly tested. func (Implementation) Srotmg(d1, d2, x1, y1 float32) (p blas.SrotmParams, rd1, rd2, rx1 float32) { - var p1, p2, q1, q2, u float32 + // The implementation of Drotmg used here is taken from Hopkins 1997 + // Appendix A: https://doi.org/10.1145/289251.289253 + // with the exception of the gam constants below. const ( gam = 4096.0 - gamsq = 16777216.0 - rgamsq = 5.9604645e-8 + gamsq = gam * gam + rgamsq = 1.0 / gamsq ) if d1 < 0 { - p.Flag = blas.Rescaling - return + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 } - p2 = d2 * y1 - if p2 == 0 { + var h11, h12, h21, h22 float32 + if d2 == 0 || y1 == 0 { p.Flag = blas.Identity - rd1 = d1 - rd2 = d2 - rx1 = x1 - return - } - p1 = d1 * x1 - q2 = p2 * y1 - q1 = p1 * x1 - - absQ1 := math.Abs(q1) - absQ2 := math.Abs(q2) - - if absQ1 < absQ2 && q2 < 0 { - p.Flag = blas.Rescaling - return - } - - if d1 == 0 { + return p, d1, d2, x1 + } else if (d1 == 0 || x1 == 0) && d2 > 0 { p.Flag = blas.Diagonal - p.H[0] = p1 / p2 - p.H[3] = x1 / y1 - u = 1 + p.H[0]*p.H[3] - rd1, rd2 = d2/u, d1/u - rx1 = y1 / u - return - } - - // Now we know that d1 != 0, and d2 != 0. If d2 == 0, it would be caught - // when p2 == 0, and if d1 == 0, then it is caught above - - if absQ1 > absQ2 { - p.H[1] = -y1 / x1 - p.H[2] = p2 / p1 - u = 1 - p.H[2]*p.H[1] - rd1 = d1 - rd2 = d2 - rx1 = x1 - p.Flag = blas.OffDiagonal - // u must be greater than zero because |q1| > |q2|, so check from netlib - // is unnecessary - // This is left in for ease of comparison with complex routines - //if u > 0 { - rd1 /= u - rd2 /= u - rx1 *= u - //} + h12 = 1 + h21 = -1 + x1 = y1 + d1, d2 = d2, d1 } else { - p.Flag = blas.Diagonal - p.H[0] = p1 / p2 - p.H[3] = x1 / y1 - u = 1 + p.H[0]*p.H[3] - rd1 = d2 / u - rd2 = d1 / u - rx1 = y1 * u - } - - for rd1 <= rgamsq || rd1 >= gamsq { - if p.Flag == blas.OffDiagonal { - p.H[0] = 1 - p.H[3] = 1 - p.Flag = blas.Rescaling - } else if p.Flag == blas.Diagonal { - p.H[1] = -1 - p.H[2] = 1 - p.Flag = blas.Rescaling - } - if rd1 <= rgamsq { - rd1 *= gam * gam - rx1 /= gam - p.H[0] /= gam - p.H[2] /= gam + p2 := d2 * y1 + p1 := d1 * x1 + q2 := p2 * y1 + q1 := p1 * x1 + if math.Abs(q1) > math.Abs(q2) { + p.Flag = blas.OffDiagonal + h11 = 1 + h22 = 1 + h21 = -y1 / x1 + h12 = p2 / p1 + u := 1 - h12*h21 + if u <= 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } else { + d1 /= u + d2 /= u + x1 *= u + } } else { - rd1 /= gam * gam - rx1 *= gam - p.H[0] *= gam - p.H[2] *= gam + if q2 < 0 { + p.Flag = blas.Rescaling // Error state. + return p, 0, 0, 0 + } else { + p.Flag = blas.Diagonal + h21 = -1 + h12 = 1 + h11 = p1 / p2 + h22 = x1 / y1 + u := 1 + h11*h22 + d1, d2 = d2/u, d1/u + x1 = y1 * u + } } } - for math.Abs(rd2) <= rgamsq || math.Abs(rd2) >= gamsq { - if p.Flag == blas.OffDiagonal { - p.H[0] = 1 - p.H[3] = 1 - p.Flag = blas.Rescaling - } else if p.Flag == blas.Diagonal { - p.H[1] = -1 - p.H[2] = 1 - p.Flag = blas.Rescaling - } - if math.Abs(rd2) <= rgamsq { - rd2 *= gam * gam - p.H[1] /= gam - p.H[3] /= gam - } else { - rd2 /= gam * gam - p.H[1] *= gam - p.H[3] *= gam - } + for d1 <= rgamsq && d1 != 0 { + p.Flag = blas.Rescaling + d1 = (d1 * gam) * gam + x1 /= gam + h11 /= gam + h12 /= gam } - return + for d1 > gamsq { + p.Flag = blas.Rescaling + d1 = (d1 / gam) / gam + x1 *= gam + h11 *= gam + h12 *= gam + } + + for math.Abs(d2) <= rgamsq && d2 != 0 { + p.Flag = blas.Rescaling + d2 = (d2 * gam) * gam + h21 /= gam + h22 /= gam + } + for math.Abs(d2) > gamsq { + p.Flag = blas.Rescaling + d2 = (d2 / gam) / gam + h21 *= gam + h22 *= gam + } + + switch p.Flag { + case blas.Diagonal: + p.H = [4]float32{0: h11, 3: h22} + case blas.OffDiagonal: + p.H = [4]float32{1: h21, 2: h12} + case blas.Rescaling: + p.H = [4]float32{h11, h21, h12, h22} + default: + panic("blas: unexpected blas.Flag") + } + + return p, d1, d2, x1 } // Srot applies a plane transformation. diff --git a/blas/testblas/level1double.go b/blas/testblas/level1double.go index b73af460..e56e554b 100644 --- a/blas/testblas/level1double.go +++ b/blas/testblas/level1double.go @@ -1702,7 +1702,7 @@ var DrotmgTests = []DrotmgTestStruct{ Name: "ZeroD1", P: &blas.DrotmParams{ Flag: blas.Diagonal, - H: [4]float64{0, 0, 0, 2}, + H: [4]float64{0, 0, 0, 0}, }, D1: 0, D2: 2, @@ -1940,12 +1940,141 @@ var DrotmgTests = []DrotmgTestStruct{ Rd2: 0.019999999900000003, Rx1: 1953.125009765625, }, + { + // Values consistent with the low precision output posted at the OpenBLAS issue. + // See https://github.com/xianyi/OpenBLAS/issues/1452. + Name: "OpenBLAS#1452", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{1.6110934624105326e-06, -0.000244140625, 0.000244140625, 1.6276041666666668e-06}, + }, + D1: 5.9e-8, + D2: 5.960464e-8, + X1: 1, + Y1: 150, + Rd1: 0.9999559282289687, + Rd2: 0.9898121986058326, + Rx1: 0.03662270484346241, + }, + { + Name: "netlib/BLAS/TESTING#1", + P: &blas.DrotmParams{ + Flag: blas.OffDiagonal, + H: [4]float64{0, -0.16666666666666669, 0.5, 0}, + }, + D1: 0.10000000000000001, + D2: 0.29999999999999999, + X1: 1.2000000000000000, + Y1: 0.20000000000000001, + Rd1: 9.2307692307692313e-2, + Rd2: 0.27692307692307694, + Rx1: 1.2999999999999998, + }, + { + Name: "netlib/BLAS/TESTING#2", + P: &blas.DrotmParams{ + Flag: blas.Diagonal, + H: [4]float64{0.5, 0, 0, 0.14285714285714285}, + }, + D1: 0.69999999999999996, + D2: 0.20000000000000001, + X1: 0.59999999999999998, + Y1: 4.2000000000000002, + Rd1: 0.18666666666666668, + Rd2: 0.65333333333333332, + Rx1: 4.5000000000000000, + }, + { + Name: "netlib/BLAS/TESTING#3", + P: &blas.DrotmParams{ + Flag: blas.Identity, + H: [4]float64{0, 0, 0, 0}, + }, + D1: 0, + D2: 0, + X1: 0, + Y1: 0, + Rd1: 0, + Rd2: 0, + Rx1: 0, + }, + { + Name: "netlib/BLAS/TESTING#4", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0, 0, 0, 0}, + }, + D1: 4, + D2: -1, + X1: 2, + Y1: 4, + Rd1: 0, + Rd2: 0, + Rx1: 0, + }, + { + Name: "netlib/BLAS/TESTING#5", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.244140625e-03, -0.1e-3, 0.8138020833333334, 1}, + }, + D1: 6e-10, + D2: 2e-2, + X1: 100000, + Y1: 10, + Rd1: 7.5497471999999991e-3, + Rd2: 1.4999999999999999e-2, + Rx1: 32.552083333333336, + }, + { + Name: "netlib/BLAS/TESTING#6", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{4096, -999999.99999999988, 2.0479999999999999e-3, 1}, + }, + D1: 40000000000, + D2: 2e-2, + X1: 1.0000000000000001e-5, + Y1: 10, + Rd1: 1589.4571940104167, + Rd2: 1.3333333333333334e-2, + Rx1: 6.1440000000000008e-2, + }, + { + Name: "netlib/BLAS/TESTING#7", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{0.5e-4, -0.2441406250e-3, 1, 2.441406250}, + }, + D1: 2.0000000000000001e-10, + D2: 4.0000000000000001e-2, + X1: 100000, + Y1: 10, + Rd1: 2.6666666666666668e-2, + Rd2: 2.2369621333333334e-3, + Rx1: 15, + }, + { + Name: "netlib/BLAS/TESTING#8", + P: &blas.DrotmParams{ + Flag: blas.Rescaling, + H: [4]float64{500000, -4096, 1, 4.096e-3}, + }, + D1: 20000000000, + D2: 4.0000000000000001e-2, + X1: 1.0000000000000001e-5, + Y1: 10, + Rd1: 2.6666666666666668e-2, + Rd2: 794.72859700520837, + Rx1: 15, + }, // TODO: Add Small, Small, 0 case // TODO: Add Small, Small, 1 case } type Drotmger interface { Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) + Drotmer } func DrotmgTest(t *testing.T, d Drotmger) { @@ -1971,6 +2100,25 @@ func DrotmgTest(t *testing.T, d Drotmger) { if !dTolEqual(rx1, test.Rx1) { t.Errorf("drotmg rx1 mismatch %v: expected %v, found %v", test.Name, test.Rx1, rx1) } + + // Drotmg routines compute the components of a modified Givens transformation + // matrix H that zeros the y-component of the resulting vector, + // + // [x1; 0] := H[x1 sqrt(d1); y1 sqrt(d2)]. + // + // Drotm performs a modified Givens rotation of points in the plane, + // + // [x1; y1] := H[x1; y1]. + y := []float64{test.Y1} + d.Drotm(1, []float64{test.X1}, 1, y, 1, p) + for i, v := range y { + if rd2 >= 0 { + v *= math.Sqrt(rd2) + } + if !dTolEqual(v, 0) { + t.Errorf("drotm y_%d mismatch %v: expected 0, found %v", i, test.Name, v) + } + } } }