blas/gonum: rewrite drotmg using Hopkins' implementation

Also fix blas.Flag documentation and explicitly state values.
This commit is contained in:
Dan Kortschak
2018-03-01 04:27:55 +10:30
committed by GitHub
parent 5e47fe3bf1
commit 58e39c9afd
4 changed files with 325 additions and 205 deletions

View File

@@ -10,10 +10,10 @@ package blas
type Flag int type Flag int
const ( const (
Identity Flag = iota - 2 // H is the identity matrix; no rotation is needed. Identity Flag = -2 // H is the identity matrix; no rotation is needed.
Rescaling // H specifies rescaling. Rescaling Flag = -1 // H specifies rescaling.
OffDiagonal // Off-diagonal elements of H are units. OffDiagonal Flag = 0 // Off-diagonal elements of H are non-unit.
Diagonal // Diagonal elements of H are units. Diagonal Flag = 1 // Diagonal elements of H are non-unit.
) )
// SrotmParams contains Givens transformation parameters returned // SrotmParams contains Givens transformation parameters returned

View File

@@ -326,122 +326,108 @@ func (Implementation) Drotg(a, b float64) (c, s, r, z float64) {
// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html // http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html
// for more details. // for more details.
func (Implementation) Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) { func (Implementation) Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) {
var p1, p2, q1, q2, u float64 // The implementation of Drotmg used here is taken from Hopkins 1997
// Appendix A: https://doi.org/10.1145/289251.289253
// with the exception of the gam constants below.
const ( const (
gam = 4096.0 gam = 4096.0
gamsq = 16777216.0 gamsq = gam * gam
rgamsq = 5.9604645e-8 rgamsq = 1.0 / gamsq
) )
if d1 < 0 { if d1 < 0 {
p.Flag = blas.Rescaling p.Flag = blas.Rescaling // Error state.
return return p, 0, 0, 0
} }
p2 = d2 * y1 var h11, h12, h21, h22 float64
if p2 == 0 { if d2 == 0 || y1 == 0 {
p.Flag = blas.Identity p.Flag = blas.Identity
rd1 = d1 return p, d1, d2, x1
rd2 = d2 } else if (d1 == 0 || x1 == 0) && d2 > 0 {
rx1 = x1
return
}
p1 = d1 * x1
q2 = p2 * y1
q1 = p1 * x1
absQ1 := math.Abs(q1)
absQ2 := math.Abs(q2)
if absQ1 < absQ2 && q2 < 0 {
p.Flag = blas.Rescaling
return
}
if d1 == 0 {
p.Flag = blas.Diagonal p.Flag = blas.Diagonal
p.H[0] = p1 / p2 h12 = 1
p.H[3] = x1 / y1 h21 = -1
u = 1 + p.H[0]*p.H[3] x1 = y1
rd1, rd2 = d2/u, d1/u d1, d2 = d2, d1
rx1 = y1 / u } else {
return p2 := d2 * y1
} p1 := d1 * x1
q2 := p2 * y1
// Now we know that d1 != 0, and d2 != 0. If d2 == 0, it would be caught q1 := p1 * x1
// when p2 == 0, and if d1 == 0, then it is caught above if math.Abs(q1) > math.Abs(q2) {
if absQ1 > absQ2 {
p.H[1] = -y1 / x1
p.H[2] = p2 / p1
u = 1 - p.H[2]*p.H[1]
rd1 = d1
rd2 = d2
rx1 = x1
p.Flag = blas.OffDiagonal p.Flag = blas.OffDiagonal
// u must be greater than zero because |q1| > |q2|, so check from netlib h11 = 1
// is unnecessary h22 = 1
// This is left in for ease of comparison with complex routines h21 = -y1 / x1
//if u > 0 { h12 = p2 / p1
rd1 /= u u := 1 - h12*h21
rd2 /= u if u <= 0 {
rx1 *= u p.Flag = blas.Rescaling // Error state.
//} return p, 0, 0, 0
} else {
d1 /= u
d2 /= u
x1 *= u
}
} else {
if q2 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
} else { } else {
p.Flag = blas.Diagonal p.Flag = blas.Diagonal
p.H[0] = p1 / p2 h21 = -1
p.H[3] = x1 / y1 h12 = 1
u = 1 + p.H[0]*p.H[3] h11 = p1 / p2
rd1 = d2 / u h22 = x1 / y1
rd2 = d1 / u u := 1 + h11*h22
rx1 = y1 * u d1, d2 = d2/u, d1/u
x1 = y1 * u
} }
for rd1 <= rgamsq || rd1 >= gamsq {
if p.Flag == blas.OffDiagonal {
p.H[0] = 1
p.H[3] = 1
p.Flag = blas.Rescaling
} else if p.Flag == blas.Diagonal {
p.H[1] = -1
p.H[2] = 1
p.Flag = blas.Rescaling
}
if rd1 <= rgamsq {
rd1 *= gam * gam
rx1 /= gam
p.H[0] /= gam
p.H[2] /= gam
} else {
rd1 /= gam * gam
rx1 *= gam
p.H[0] *= gam
p.H[2] *= gam
} }
} }
for math.Abs(rd2) <= rgamsq || math.Abs(rd2) >= gamsq { for d1 <= rgamsq && d1 != 0 {
if p.Flag == blas.OffDiagonal {
p.H[0] = 1
p.H[3] = 1
p.Flag = blas.Rescaling p.Flag = blas.Rescaling
} else if p.Flag == blas.Diagonal { d1 = (d1 * gam) * gam
p.H[1] = -1 x1 /= gam
p.H[2] = 1 h11 /= gam
h12 /= gam
}
for d1 > gamsq {
p.Flag = blas.Rescaling p.Flag = blas.Rescaling
d1 = (d1 / gam) / gam
x1 *= gam
h11 *= gam
h12 *= gam
} }
if math.Abs(rd2) <= rgamsq {
rd2 *= gam * gam for math.Abs(d2) <= rgamsq && d2 != 0 {
p.H[1] /= gam p.Flag = blas.Rescaling
p.H[3] /= gam d2 = (d2 * gam) * gam
} else { h21 /= gam
rd2 /= gam * gam h22 /= gam
p.H[1] *= gam
p.H[3] *= gam
} }
for math.Abs(d2) > gamsq {
p.Flag = blas.Rescaling
d2 = (d2 / gam) / gam
h21 *= gam
h22 *= gam
} }
return
switch p.Flag {
case blas.Diagonal:
p.H = [4]float64{0: h11, 3: h22}
case blas.OffDiagonal:
p.H = [4]float64{1: h21, 2: h12}
case blas.Rescaling:
p.H = [4]float64{h11, h21, h12, h22}
default:
panic("blas: unexpected blas.Flag")
}
return p, d1, d2, x1
} }
// Drot applies a plane transformation. // Drot applies a plane transformation.

View File

@@ -344,122 +344,108 @@ func (Implementation) Srotg(a, b float32) (c, s, r, z float32) {
// //
// Float32 implementations are autogenerated and not directly tested. // Float32 implementations are autogenerated and not directly tested.
func (Implementation) Srotmg(d1, d2, x1, y1 float32) (p blas.SrotmParams, rd1, rd2, rx1 float32) { func (Implementation) Srotmg(d1, d2, x1, y1 float32) (p blas.SrotmParams, rd1, rd2, rx1 float32) {
var p1, p2, q1, q2, u float32 // The implementation of Drotmg used here is taken from Hopkins 1997
// Appendix A: https://doi.org/10.1145/289251.289253
// with the exception of the gam constants below.
const ( const (
gam = 4096.0 gam = 4096.0
gamsq = 16777216.0 gamsq = gam * gam
rgamsq = 5.9604645e-8 rgamsq = 1.0 / gamsq
) )
if d1 < 0 { if d1 < 0 {
p.Flag = blas.Rescaling p.Flag = blas.Rescaling // Error state.
return return p, 0, 0, 0
} }
p2 = d2 * y1 var h11, h12, h21, h22 float32
if p2 == 0 { if d2 == 0 || y1 == 0 {
p.Flag = blas.Identity p.Flag = blas.Identity
rd1 = d1 return p, d1, d2, x1
rd2 = d2 } else if (d1 == 0 || x1 == 0) && d2 > 0 {
rx1 = x1
return
}
p1 = d1 * x1
q2 = p2 * y1
q1 = p1 * x1
absQ1 := math.Abs(q1)
absQ2 := math.Abs(q2)
if absQ1 < absQ2 && q2 < 0 {
p.Flag = blas.Rescaling
return
}
if d1 == 0 {
p.Flag = blas.Diagonal p.Flag = blas.Diagonal
p.H[0] = p1 / p2 h12 = 1
p.H[3] = x1 / y1 h21 = -1
u = 1 + p.H[0]*p.H[3] x1 = y1
rd1, rd2 = d2/u, d1/u d1, d2 = d2, d1
rx1 = y1 / u } else {
return p2 := d2 * y1
} p1 := d1 * x1
q2 := p2 * y1
// Now we know that d1 != 0, and d2 != 0. If d2 == 0, it would be caught q1 := p1 * x1
// when p2 == 0, and if d1 == 0, then it is caught above if math.Abs(q1) > math.Abs(q2) {
if absQ1 > absQ2 {
p.H[1] = -y1 / x1
p.H[2] = p2 / p1
u = 1 - p.H[2]*p.H[1]
rd1 = d1
rd2 = d2
rx1 = x1
p.Flag = blas.OffDiagonal p.Flag = blas.OffDiagonal
// u must be greater than zero because |q1| > |q2|, so check from netlib h11 = 1
// is unnecessary h22 = 1
// This is left in for ease of comparison with complex routines h21 = -y1 / x1
//if u > 0 { h12 = p2 / p1
rd1 /= u u := 1 - h12*h21
rd2 /= u if u <= 0 {
rx1 *= u p.Flag = blas.Rescaling // Error state.
//} return p, 0, 0, 0
} else {
d1 /= u
d2 /= u
x1 *= u
}
} else {
if q2 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
} else { } else {
p.Flag = blas.Diagonal p.Flag = blas.Diagonal
p.H[0] = p1 / p2 h21 = -1
p.H[3] = x1 / y1 h12 = 1
u = 1 + p.H[0]*p.H[3] h11 = p1 / p2
rd1 = d2 / u h22 = x1 / y1
rd2 = d1 / u u := 1 + h11*h22
rx1 = y1 * u d1, d2 = d2/u, d1/u
x1 = y1 * u
} }
for rd1 <= rgamsq || rd1 >= gamsq {
if p.Flag == blas.OffDiagonal {
p.H[0] = 1
p.H[3] = 1
p.Flag = blas.Rescaling
} else if p.Flag == blas.Diagonal {
p.H[1] = -1
p.H[2] = 1
p.Flag = blas.Rescaling
}
if rd1 <= rgamsq {
rd1 *= gam * gam
rx1 /= gam
p.H[0] /= gam
p.H[2] /= gam
} else {
rd1 /= gam * gam
rx1 *= gam
p.H[0] *= gam
p.H[2] *= gam
} }
} }
for math.Abs(rd2) <= rgamsq || math.Abs(rd2) >= gamsq { for d1 <= rgamsq && d1 != 0 {
if p.Flag == blas.OffDiagonal {
p.H[0] = 1
p.H[3] = 1
p.Flag = blas.Rescaling p.Flag = blas.Rescaling
} else if p.Flag == blas.Diagonal { d1 = (d1 * gam) * gam
p.H[1] = -1 x1 /= gam
p.H[2] = 1 h11 /= gam
h12 /= gam
}
for d1 > gamsq {
p.Flag = blas.Rescaling p.Flag = blas.Rescaling
d1 = (d1 / gam) / gam
x1 *= gam
h11 *= gam
h12 *= gam
} }
if math.Abs(rd2) <= rgamsq {
rd2 *= gam * gam for math.Abs(d2) <= rgamsq && d2 != 0 {
p.H[1] /= gam p.Flag = blas.Rescaling
p.H[3] /= gam d2 = (d2 * gam) * gam
} else { h21 /= gam
rd2 /= gam * gam h22 /= gam
p.H[1] *= gam
p.H[3] *= gam
} }
for math.Abs(d2) > gamsq {
p.Flag = blas.Rescaling
d2 = (d2 / gam) / gam
h21 *= gam
h22 *= gam
} }
return
switch p.Flag {
case blas.Diagonal:
p.H = [4]float32{0: h11, 3: h22}
case blas.OffDiagonal:
p.H = [4]float32{1: h21, 2: h12}
case blas.Rescaling:
p.H = [4]float32{h11, h21, h12, h22}
default:
panic("blas: unexpected blas.Flag")
}
return p, d1, d2, x1
} }
// Srot applies a plane transformation. // Srot applies a plane transformation.

View File

@@ -1702,7 +1702,7 @@ var DrotmgTests = []DrotmgTestStruct{
Name: "ZeroD1", Name: "ZeroD1",
P: &blas.DrotmParams{ P: &blas.DrotmParams{
Flag: blas.Diagonal, Flag: blas.Diagonal,
H: [4]float64{0, 0, 0, 2}, H: [4]float64{0, 0, 0, 0},
}, },
D1: 0, D1: 0,
D2: 2, D2: 2,
@@ -1940,12 +1940,141 @@ var DrotmgTests = []DrotmgTestStruct{
Rd2: 0.019999999900000003, Rd2: 0.019999999900000003,
Rx1: 1953.125009765625, Rx1: 1953.125009765625,
}, },
{
// Values consistent with the low precision output posted at the OpenBLAS issue.
// See https://github.com/xianyi/OpenBLAS/issues/1452.
Name: "OpenBLAS#1452",
P: &blas.DrotmParams{
Flag: blas.Rescaling,
H: [4]float64{1.6110934624105326e-06, -0.000244140625, 0.000244140625, 1.6276041666666668e-06},
},
D1: 5.9e-8,
D2: 5.960464e-8,
X1: 1,
Y1: 150,
Rd1: 0.9999559282289687,
Rd2: 0.9898121986058326,
Rx1: 0.03662270484346241,
},
{
Name: "netlib/BLAS/TESTING#1",
P: &blas.DrotmParams{
Flag: blas.OffDiagonal,
H: [4]float64{0, -0.16666666666666669, 0.5, 0},
},
D1: 0.10000000000000001,
D2: 0.29999999999999999,
X1: 1.2000000000000000,
Y1: 0.20000000000000001,
Rd1: 9.2307692307692313e-2,
Rd2: 0.27692307692307694,
Rx1: 1.2999999999999998,
},
{
Name: "netlib/BLAS/TESTING#2",
P: &blas.DrotmParams{
Flag: blas.Diagonal,
H: [4]float64{0.5, 0, 0, 0.14285714285714285},
},
D1: 0.69999999999999996,
D2: 0.20000000000000001,
X1: 0.59999999999999998,
Y1: 4.2000000000000002,
Rd1: 0.18666666666666668,
Rd2: 0.65333333333333332,
Rx1: 4.5000000000000000,
},
{
Name: "netlib/BLAS/TESTING#3",
P: &blas.DrotmParams{
Flag: blas.Identity,
H: [4]float64{0, 0, 0, 0},
},
D1: 0,
D2: 0,
X1: 0,
Y1: 0,
Rd1: 0,
Rd2: 0,
Rx1: 0,
},
{
Name: "netlib/BLAS/TESTING#4",
P: &blas.DrotmParams{
Flag: blas.Rescaling,
H: [4]float64{0, 0, 0, 0},
},
D1: 4,
D2: -1,
X1: 2,
Y1: 4,
Rd1: 0,
Rd2: 0,
Rx1: 0,
},
{
Name: "netlib/BLAS/TESTING#5",
P: &blas.DrotmParams{
Flag: blas.Rescaling,
H: [4]float64{0.244140625e-03, -0.1e-3, 0.8138020833333334, 1},
},
D1: 6e-10,
D2: 2e-2,
X1: 100000,
Y1: 10,
Rd1: 7.5497471999999991e-3,
Rd2: 1.4999999999999999e-2,
Rx1: 32.552083333333336,
},
{
Name: "netlib/BLAS/TESTING#6",
P: &blas.DrotmParams{
Flag: blas.Rescaling,
H: [4]float64{4096, -999999.99999999988, 2.0479999999999999e-3, 1},
},
D1: 40000000000,
D2: 2e-2,
X1: 1.0000000000000001e-5,
Y1: 10,
Rd1: 1589.4571940104167,
Rd2: 1.3333333333333334e-2,
Rx1: 6.1440000000000008e-2,
},
{
Name: "netlib/BLAS/TESTING#7",
P: &blas.DrotmParams{
Flag: blas.Rescaling,
H: [4]float64{0.5e-4, -0.2441406250e-3, 1, 2.441406250},
},
D1: 2.0000000000000001e-10,
D2: 4.0000000000000001e-2,
X1: 100000,
Y1: 10,
Rd1: 2.6666666666666668e-2,
Rd2: 2.2369621333333334e-3,
Rx1: 15,
},
{
Name: "netlib/BLAS/TESTING#8",
P: &blas.DrotmParams{
Flag: blas.Rescaling,
H: [4]float64{500000, -4096, 1, 4.096e-3},
},
D1: 20000000000,
D2: 4.0000000000000001e-2,
X1: 1.0000000000000001e-5,
Y1: 10,
Rd1: 2.6666666666666668e-2,
Rd2: 794.72859700520837,
Rx1: 15,
},
// TODO: Add Small, Small, 0 case // TODO: Add Small, Small, 0 case
// TODO: Add Small, Small, 1 case // TODO: Add Small, Small, 1 case
} }
type Drotmger interface { type Drotmger interface {
Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64)
Drotmer
} }
func DrotmgTest(t *testing.T, d Drotmger) { func DrotmgTest(t *testing.T, d Drotmger) {
@@ -1971,6 +2100,25 @@ func DrotmgTest(t *testing.T, d Drotmger) {
if !dTolEqual(rx1, test.Rx1) { if !dTolEqual(rx1, test.Rx1) {
t.Errorf("drotmg rx1 mismatch %v: expected %v, found %v", test.Name, test.Rx1, rx1) t.Errorf("drotmg rx1 mismatch %v: expected %v, found %v", test.Name, test.Rx1, rx1)
} }
// Drotmg routines compute the components of a modified Givens transformation
// matrix H that zeros the y-component of the resulting vector,
//
// [x1; 0] := H[x1 sqrt(d1); y1 sqrt(d2)].
//
// Drotm performs a modified Givens rotation of points in the plane,
//
// [x1; y1] := H[x1; y1].
y := []float64{test.Y1}
d.Drotm(1, []float64{test.X1}, 1, y, 1, p)
for i, v := range y {
if rd2 >= 0 {
v *= math.Sqrt(rd2)
}
if !dTolEqual(v, 0) {
t.Errorf("drotm y_%d mismatch %v: expected 0, found %v", i, test.Name, v)
}
}
} }
} }