diff --git a/lapack.go b/lapack.go
index 1cb72475..f6fbb511 100644
--- a/lapack.go
+++ b/lapack.go
@@ -10,23 +10,51 @@ const None = 'N'
 
 type Job byte
 
-const (
-	All       (Job) = 'A'
-	Slim      (Job) = 'S'
-	Overwrite (Job) = 'O'
-)
-
+// CompSV determines if the singular values are to be computed in compact form.
 type CompSV byte
 
 const (
-	Compact  (CompSV) = 'P'
-	Explicit (CompSV) = 'I'
+	Compact  CompSV = 'P'
+	Explicit CompSV = 'I'
 )
 
-// Float64 defines the float64 interface for the Lapack function. This interface
-// contains the functions needed in the gonum suite.
+// Complex128 defines the public complex128 LAPACK API supported by gonum/lapack.
+type Complex128 interface{}
+
+// Float64 defines the public float64 LAPACK API supported by gonum/lapack.
 type Float64 interface {
 	Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok bool)
 }
 
-type Complex128 interface{}
+// Direct specifies the direction of the multiplication for the Householder matrix.
+type Direct byte
+
+const (
+	Forward  Direct = 'F' // Reflectors are right-multiplied, H_1 * H_2 * ... * H_k
+	Backward Direct = 'B' // Reflectors are left-multiplied, H_k * ... * H_2 * H_1
+)
+
+// StoreV indicates the storage direction of elementary reflectors.
+type StoreV byte
+
+const (
+	ColumnWise StoreV = 'C' // Reflector stored in a column of the matrix.
+	RowWise    StoreV = 'R' // Reflector stored in a row of the matrix.
+)
+
+// MatrixNorm represents the kind of matrix norm to compute.
+type MatrixNorm byte
+
+const (
+	MaxAbs       MatrixNorm = 'M' // max(abs(A(i,j)))  ('M')
+	MaxColumnSum MatrixNorm = 'O' // Maximum column sum (one norm) ('1', 'O')
+	MaxRowSum    MatrixNorm = 'I' // Maximum row sum (infinity norm) ('I', 'i')
+	NormFrob     MatrixNorm = 'F' // Frobenium norm (sqrt of sum of squares) ('F', 'f', E, 'e')
+)
+
+// MatrixType represents the kind of matrix represented in the data.
+type MatrixType byte
+
+const (
+	General MatrixType = 'G' // A dense matrix (like blas64.General).
+)
diff --git a/native/dgelq2.go b/native/dgelq2.go
new file mode 100644
index 00000000..184769d7
--- /dev/null
+++ b/native/dgelq2.go
@@ -0,0 +1,44 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import "github.com/gonum/blas"
+
+// Dgelq2 computes the LQ factorization of the m×n matrix a.
+//
+// During Dgelq2, a is modified to contain the information to construct Q and L.
+// The lower triangle of a contains the matrix L. The upper triangular elements
+// (not including the diagonal) contain the elementary reflectors. Tau is modified
+// to contain the reflector scales. Tau must have length of at least k = min(m,n)
+// and this function will panic otherwise.
+//
+// See Dgeqr2 for a description of the elementary reflectors and orthonormal
+// matrix Q. Q is constructed as a product of these elementary reflectors,
+// Q = H_k ... H_2*H_1.
+//
+// Work is temporary storage of length at least m and this function will panic otherwise.
+func (impl Implementation) Dgelq2(m, n int, a []float64, lda int, tau, work []float64) {
+	checkMatrix(m, n, a, lda)
+	k := min(m, n)
+	if len(tau) < k {
+		panic(badTau)
+	}
+	if len(work) < m {
+		panic(badWork)
+	}
+	for i := 0; i < k; i++ {
+		a[i*lda+i], tau[i] = impl.Dlarfg(n-i, a[i*lda+i], a[i*lda+min(i+1, n-1):], 1)
+		if i < m-1 {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(blas.Right, m-i-1, n-i,
+				a[i*lda+i:], 1,
+				tau[i],
+				a[(i+1)*lda+i:], lda,
+				work)
+			a[i*lda+i] = aii
+		}
+	}
+}
diff --git a/native/dgelqf.go b/native/dgelqf.go
new file mode 100644
index 00000000..bc52f3b0
--- /dev/null
+++ b/native/dgelqf.go
@@ -0,0 +1,84 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/lapack"
+)
+
+// Dgelqf computes the LQ factorization of the m×n matrix a using a blocked
+// algorithm. Please see the documentation for Dgelq2 for a description of the
+// parameters at entry and exit.
+//
+// Work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= m, and this function will panic otherwise.
+// Dgelqf is a blocked LQ factorization, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dgelqf,
+// the optimal work length will be stored into work[0].
+//
+// tau must have length at least min(m,n), and this function will panic otherwise.
+func (impl Implementation) Dgelqf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
+	nb := impl.Ilaenv(1, "DGELQF", " ", m, n, -1, -1)
+	lworkopt := m * max(nb, 1)
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+	checkMatrix(m, n, a, lda)
+	if len(work) < lwork {
+		panic(shortWork)
+	}
+	if lwork < m {
+		panic(badWork)
+	}
+	k := min(m, n)
+	if len(tau) < k {
+		panic(badTau)
+	}
+	if k == 0 {
+		return
+	}
+	// Find the optimal blocking size based on the size of available memory
+	// and optimal machine parameters.
+	nbmin := 2
+	var nx int
+	iws := m
+	ldwork := nb
+	if nb > 1 && k > nb {
+		nx = max(0, impl.Ilaenv(3, "DGELQF", " ", m, n, -1, -1))
+		if nx < k {
+			iws = m * nb
+			if lwork < iws {
+				nb = lwork / m
+				nbmin = max(2, impl.Ilaenv(2, "DGELQF", " ", m, n, -1, -1))
+			}
+		}
+	}
+	// Computed blocked LQ factorization.
+	var i int
+	if nb >= nbmin && nb < k && nx < k {
+		for i = 0; i < k-nx; i += nb {
+			ib := min(k-i, nb)
+			impl.Dgelq2(ib, n-i, a[i*lda+i:], lda, tau[i:], work)
+			if i+ib < m {
+				impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib,
+					a[i*lda+i:], lda,
+					tau[i:],
+					work, ldwork)
+				impl.Dlarfb(blas.Right, blas.NoTrans, lapack.Forward, lapack.RowWise,
+					m-i-ib, n-i, ib,
+					a[i*lda+i:], lda,
+					work, ldwork,
+					a[(i+ib)*lda+i:], lda,
+					work[ib*ldwork:], ldwork)
+			}
+		}
+	}
+	// Perform unblocked LQ factorization on the remainder.
+	if i < k {
+		impl.Dgelq2(m-i, n-i, a[i*lda+i:], lda, tau[i:], work)
+	}
+}
diff --git a/native/dgels.go b/native/dgels.go
new file mode 100644
index 00000000..7759561b
--- /dev/null
+++ b/native/dgels.go
@@ -0,0 +1,200 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/lapack"
+)
+
+// Dgels finds a minimum-norm solution based on the matrices a and b using the
+// QR or LQ factorization. Dgels returns false if the matrix
+// A is singular, and true if this solution was successfully found.
+//
+// The minimization problem solved depends on the input parameters.
+//
+//  1. If m >= n and trans == blas.NoTrans, Dgels finds X such that || A*X - B||_2
+//  is minimized.
+//  2. If m < n and trans == blas.NoTrans, Dgels finds the minimum norm solution of
+//  A * X = B.
+//  3. If m >= n and trans == blas.Trans, Dgels finds the minimum norm solution of
+//  A^T * X = B.
+//  4. If m < n and trans == blas.Trans, Dgels finds X such that || A*X - B||_2
+//  is minimized.
+// Note that the least-squares solutions (cases 1 and 3) perform the minimization
+// per column of B. This is not the same as finding the minimum-norm matrix.
+//
+// The matrix a is a general matrix of size m×n and is modified during this call.
+// The input matrix b is of size max(m,n)×nrhs, and serves two purposes. On entry,
+// the elements of b specify the input matrix B. B has size m×nrhs if
+// trans == blas.NoTrans, and n×nrhs if trans == blas.Trans. On exit, the
+// leading submatrix of b contains the solution vectors X. If trans == blas.NoTrans,
+// this submatrix is of size n×nrhs, and of size m×nrhs otherwise.
+//
+// Work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= max(m,n) + max(m,n,nrhs), and this function will panic
+// otherwise. A longer work will enable blocked algorithms to be called.
+// In the special case that lwork == -1, work[0] will be set to the optimal working
+// length.
+func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float64, lda int, b []float64, ldb int, work []float64, lwork int) bool {
+	notran := trans == blas.NoTrans
+	checkMatrix(m, n, a, lda)
+	mn := min(m, n)
+	checkMatrix(mn, nrhs, b, ldb)
+
+	// Find optimal block size.
+	tpsd := true
+	if notran {
+		tpsd = false
+	}
+	var nb int
+	if m >= n {
+		nb = impl.Ilaenv(1, "DGEQRF", " ", m, n, -1, -1)
+		if tpsd {
+			nb = max(nb, impl.Ilaenv(1, "DORMQR", "LN", m, nrhs, n, -1))
+		} else {
+			nb = max(nb, impl.Ilaenv(1, "DORMQR", "LT", m, nrhs, n, -1))
+		}
+	} else {
+		nb = impl.Ilaenv(1, "DGELQF", " ", m, n, -1, -1)
+		if tpsd {
+			nb = max(nb, impl.Ilaenv(1, "DORMLQ", "LT", n, nrhs, m, -1))
+		} else {
+			nb = max(nb, impl.Ilaenv(1, "DORMLQ", "LN", n, nrhs, m, -1))
+		}
+	}
+	if lwork == -1 {
+		work[0] = float64(max(1, mn+max(mn, nrhs)*nb))
+		return true
+	}
+
+	if len(work) < lwork {
+		panic(shortWork)
+	}
+	if lwork < mn+max(mn, nrhs) {
+		panic(badWork)
+	}
+	if m == 0 || n == 0 || nrhs == 0 {
+		impl.Dlaset(blas.All, max(m, n), nrhs, 0, 0, b, ldb)
+		return true
+	}
+
+	// Scale the input matrices if they contain extreme values.
+	smlnum := dlamchS / dlamchP
+	bignum := 1 / smlnum
+	anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
+	var iascl int
+	if anrm > 0 && anrm < smlnum {
+		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
+		iascl = 1
+	} else if anrm > bignum {
+		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
+	} else if anrm == 0 {
+		// Matrix all zeros
+		impl.Dlaset(blas.All, max(m, n), nrhs, 0, 0, b, ldb)
+		return true
+	}
+	brow := m
+	if tpsd {
+		brow = n
+	}
+	bnrm := impl.Dlange(lapack.MaxAbs, brow, nrhs, b, ldb, nil)
+	ibscl := 0
+	if bnrm > 0 && bnrm < smlnum {
+		impl.Dlascl(lapack.General, 0, 0, bnrm, smlnum, brow, nrhs, b, ldb)
+		ibscl = 1
+	} else if bnrm > bignum {
+		impl.Dlascl(lapack.General, 0, 0, bnrm, bignum, brow, nrhs, b, ldb)
+		ibscl = 2
+	}
+
+	// Solve the minimization problem using a QR or an LQ decomposition.
+	var scllen int
+	if m >= n {
+		impl.Dgeqrf(m, n, a, lda, work, work[mn:], lwork-mn)
+		if !tpsd {
+			impl.Dormqr(blas.Left, blas.Trans, m, nrhs, n,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			ok := impl.Dtrtrs(blas.Upper, blas.NoTrans, blas.NonUnit, n, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+			scllen = n
+		} else {
+			ok := impl.Dtrtrs(blas.Upper, blas.Trans, blas.NonUnit, n, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+			for i := n; i < m; i++ {
+				for j := 0; j < nrhs; j++ {
+					b[i*ldb+j] = 0
+				}
+			}
+			impl.Dormqr(blas.Left, blas.NoTrans, m, nrhs, n,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			scllen = m
+		}
+	} else {
+		impl.Dgelqf(m, n, a, lda, work, work[mn:], lwork-mn)
+		if !tpsd {
+			ok := impl.Dtrtrs(blas.Lower, blas.NoTrans, blas.NonUnit,
+				m, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+			for i := m; i < n; i++ {
+				for j := 0; j < nrhs; j++ {
+					b[i*ldb+j] = 0
+				}
+			}
+			impl.Dormlq(blas.Left, blas.Trans, n, nrhs, m,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			scllen = n
+		} else {
+			impl.Dormlq(blas.Left, blas.NoTrans, n, nrhs, m,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			ok := impl.Dtrtrs(blas.Lower, blas.Trans, blas.NonUnit,
+				m, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+		}
+	}
+
+	// Adjust answer vector based on scaling.
+	if iascl == 1 {
+		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, scllen, nrhs, b, ldb)
+	}
+	if iascl == 2 {
+		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, scllen, nrhs, b, ldb)
+	}
+	if ibscl == 1 {
+		impl.Dlascl(lapack.General, 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb)
+	}
+	if ibscl == 2 {
+		impl.Dlascl(lapack.General, 0, 0, bignum, bnrm, scllen, nrhs, b, ldb)
+	}
+	return true
+}
diff --git a/native/dgeqr2.go b/native/dgeqr2.go
new file mode 100644
index 00000000..efae4a77
--- /dev/null
+++ b/native/dgeqr2.go
@@ -0,0 +1,57 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import "github.com/gonum/blas"
+
+// Dgeqr2 computes a QR factorization of the m×n matrix a.
+//
+// In a QR factorization, Q is an m×m orthonormal matrix, and R is an
+// upper triangular m×n matrix.
+//
+// During Dgeqr2, a is modified to contain the information to construct Q and R.
+// The upper triangle of a contains the matrix R. The lower triangular elements
+// (not including the diagonal) contain the elementary reflectors. Tau is modified
+// to contain the reflector scales. Tau must have length at least k = min(m,n), and
+// this function will panic otherwise.
+//
+// The ith elementary reflector can be explicitly constructed by first extracting
+// the
+//  v[j] = 0           j < i
+//  v[j] = i           j == i
+//  v[j] = a[i*lda+j]  j > i
+// and computing h_i = I - tau[i] * v * v^T.
+//
+// The orthonormal matrix Q can be constucted from a product of these elementary
+// reflectors, Q = H_1*H_2 ... H_k, where k = min(m,n).
+//
+// Work is temporary storage of length at least n and this function will panic otherwise.
+func (impl Implementation) Dgeqr2(m, n int, a []float64, lda int, tau, work []float64) {
+	// TODO(btracey): This is oriented such that columns of a are eliminated.
+	// This likely could be re-arranged to take better advantage of row-major
+	// storage.
+	checkMatrix(m, n, a, lda)
+	if len(work) < n {
+		panic(badWork)
+	}
+	k := min(m, n)
+	if len(tau) < k {
+		panic(badTau)
+	}
+	for i := 0; i < k; i++ {
+		// Generate elementary reflector H(i).
+		a[i*lda+i], tau[i] = impl.Dlarfg(m-i, a[i*lda+i], a[min((i+1), m-1)*lda+i:], lda)
+		if i < n-1 {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(blas.Left, m-i, n-i-1,
+				a[i*lda+i:], lda,
+				tau[i],
+				a[i*lda+i+1:], lda,
+				work)
+			a[i*lda+i] = aii
+		}
+	}
+}
diff --git a/native/dgeqrf.go b/native/dgeqrf.go
new file mode 100644
index 00000000..e7c05601
--- /dev/null
+++ b/native/dgeqrf.go
@@ -0,0 +1,98 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/lapack"
+)
+
+// Dgeqrf computes the QR factorization of the m×n matrix a using a blocked
+// algorithm. Please see the documentation for Dgeqr2 for a description of the
+// parameters at entry and exit.
+//
+// Work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= m and this function will panic otherwise.
+// Dgeqrf is a blocked LQ factorization, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dgelqf,
+// the optimal work length will be stored into work[0].
+//
+// tau must be at least len min(m,n), and this function will panic otherwise.
+func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
+	// TODO(btracey): This algorithm is oriented for column-major storage.
+	// Consider modifying the algorithm to better suit row-major storage.
+
+	// nb is the optimal blocksize, i.e. the number of columns transformed at a time.
+	nb := impl.Ilaenv(1, "DGEQRF", " ", m, n, -1, -1)
+	lworkopt := n * max(nb, 1)
+	lworkopt = max(n, lworkopt)
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+	checkMatrix(m, n, a, lda)
+	if len(work) < lwork {
+		panic(shortWork)
+	}
+	if lwork < n {
+		panic(badWork)
+	}
+	k := min(m, n)
+	if len(tau) < k {
+		panic(badTau)
+	}
+	if k == 0 {
+		return
+	}
+	nbmin := 2 // Minimal number of blocks
+	var nx int // Use unblocked (unless changed in the next for loop)
+	iws := n
+	ldwork := nb
+	// Only consider blocked if the suggested number of blocks is > 1 and the
+	// number of columns is sufficiently large.
+	if nb > 1 && k > nb {
+		// nx is the crossover point. Above this value the blocked routine should be used.
+		nx = max(0, impl.Ilaenv(3, "DGEQRF", " ", m, n, -1, -1))
+		if k > nx {
+			iws = ldwork * n
+			if lwork < iws {
+				// Not enough workspace to use the optimal number of blocks. Instead,
+				// get the maximum allowable number of blocks.
+				nb = lwork / n
+				nbmin = max(2, impl.Ilaenv(2, "DGEQRF", " ", m, n, -1, -1))
+			}
+		}
+	}
+	for i := range work {
+		work[i] = 0
+	}
+	// Compute QR using a blocked algorithm.
+	var i int
+	if nb >= nbmin && nb < k && nx < k {
+		for i = 0; i < k-nx; i += nb {
+			ib := min(k-i, nb)
+			// Compute the QR factorization of the current block.
+			impl.Dgeqr2(m-i, ib, a[i*lda+i:], lda, tau[i:], work)
+			if i+ib < n {
+				// Form the triangular factor of the block reflector and apply H^T
+				// In Dlarft, work becomes the T matrix.
+				impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib,
+					a[i*lda+i:], lda,
+					tau[i:],
+					work, ldwork)
+				impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise,
+					m-i, n-i-ib, ib,
+					a[i*lda+i:], lda,
+					work, ldwork,
+					a[i*lda+i+ib:], lda,
+					work[ib*ldwork:], ldwork)
+			}
+		}
+	}
+	// Call unblocked code on the remaining columns.
+	if i < k {
+		impl.Dgeqr2(m-i, n-i, a[i*lda+i:], lda, tau[i:], work)
+	}
+}
diff --git a/native/dlange.go b/native/dlange.go
new file mode 100644
index 00000000..fb8259a2
--- /dev/null
+++ b/native/dlange.go
@@ -0,0 +1,76 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"math"
+
+	"github.com/gonum/lapack"
+)
+
+// Dlange computes the matrix norm of the general m×n matrix a. The input norm
+// specifies the norm computed.
+//  lapack.MaxAbs: the maximum absolute value of an element.
+//  lapack.MaxColumnSum: the maximum column sum of the absolute values of the entries.
+//  lapack.MaxRowSum: the maximum row sum of the absolute values of the entries.
+//  lapack.Frobenius: the square root of the sum of the squares of the entries.
+// If norm == lapack.MaxColumnSum, work must be of length n, and this function will panic otherwise.
+// There are no restrictions on work for the other matrix norms.
+func (impl Implementation) Dlange(norm lapack.MatrixNorm, m, n int, a []float64, lda int, work []float64) float64 {
+	// TODO(btracey): These should probably be refactored to use BLAS calls.
+	checkMatrix(m, n, a, lda)
+	if m == 0 && n == 0 {
+		return 0
+	}
+	if norm == lapack.MaxAbs {
+		var value float64
+		for i := 0; i < m; i++ {
+			for j := 0; j < n; j++ {
+				value = math.Max(value, math.Abs(a[i*lda+j]))
+			}
+		}
+		return value
+	}
+	if norm == lapack.MaxColumnSum {
+		if len(work) < n {
+			panic(badWork)
+		}
+		for i := 0; i < n; i++ {
+			work[i] = 0
+		}
+		for i := 0; i < m; i++ {
+			for j := 0; j < n; j++ {
+				work[j] += math.Abs(a[i*lda+j])
+			}
+		}
+		var value float64
+		for i := 0; i < n; i++ {
+			value = math.Max(value, work[i])
+		}
+		return value
+	}
+	if norm == lapack.MaxRowSum {
+		var value float64
+		for i := 0; i < m; i++ {
+			var sum float64
+			for j := 0; j < n; j++ {
+				sum += math.Abs(a[i*lda+j])
+			}
+			value = math.Max(value, sum)
+		}
+		return value
+	}
+	if norm == lapack.NormFrob {
+		var value float64
+		scale := 0.0
+		sum := 1.0
+		for i := 0; i < m; i++ {
+			scale, sum = impl.Dlassq(n, a[i*lda:], 1, scale, sum)
+		}
+		value = scale * math.Sqrt(sum)
+		return value
+	}
+	panic("lapack: bad matrix norm")
+}
diff --git a/native/dlapy2.go b/native/dlapy2.go
new file mode 100644
index 00000000..172a1174
--- /dev/null
+++ b/native/dlapy2.go
@@ -0,0 +1,12 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import "math"
+
+// Dlapy2 is the LAPACK version of math.Hypot.
+func (Implementation) Dlapy2(x, y float64) float64 {
+	return math.Hypot(x, y)
+}
diff --git a/native/dlarf.go b/native/dlarf.go
index d86a39eb..ad473fbe 100644
--- a/native/dlarf.go
+++ b/native/dlarf.go
@@ -17,25 +17,37 @@ import (
 //  h = 1 - tau * v * v^T
 // and c is an m * n matrix.
 //
-// Work is pre-allocated memory of size at least m if side == Left and at least
-// n if side == Right. Dlarf will panic if this length requirement is not met.
+
+// Work is temporary storage of length at least m if side == Left and at least
+// n if side == Right. This function will panic if this length requirement is not met.
 func (impl Implementation) Dlarf(side blas.Side, m, n int, v []float64, incv int, tau float64, c []float64, ldc int, work []float64) {
 	applyleft := side == blas.Left
 	if (applyleft && len(work) < n) || (!applyleft && len(work) < m) {
-		panic("dlarf: insufficient work length")
+		panic(badWork)
 	}
+	checkMatrix(m, n, c, ldc)
+
+	// v has length m if applyleft and n otherwise.
+	lenV := n
+	if applyleft {
+		lenV = m
+	}
+
+	checkVector(lenV, v, incv)
+
 	lastv := 0 // last non-zero element of v
 	lastc := 0 // last non-zero row/column of c
 	if tau != 0 {
 		var i int
 		if applyleft {
-			lastv = m
+			lastv = m - 1
 		} else {
-			lastv = n
+			lastv = n - 1
 		}
 		if incv > 0 {
-			i = (lastv - 1) * incv
+			i = lastv * incv
 		}
+
 		// Look for the last non-zero row in v.
 		for lastv >= 0 && v[i] == 0 {
 			lastv--
@@ -43,10 +55,10 @@ func (impl Implementation) Dlarf(side blas.Side, m, n int, v []float64, incv int
 		}
 		if applyleft {
 			// Scan for the last non-zero column in C[0:lastv, :]
-			lastc = impl.Iladlc(lastv, n, c, ldc)
+			lastc = impl.Iladlc(lastv+1, n, c, ldc)
 		} else {
 			// Scan for the last non-zero row in C[:, 0:lastv]
-			lastc = impl.Iladlr(m, lastv, c, ldc)
+			lastc = impl.Iladlr(m, lastv+1, c, ldc)
 		}
 	}
 	if lastv == -1 || lastc == -1 {
diff --git a/native/dlarfb.go b/native/dlarfb.go
new file mode 100644
index 00000000..927dd9a5
--- /dev/null
+++ b/native/dlarfb.go
@@ -0,0 +1,424 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/lapack"
+)
+
+// Dlarfb applies a block reflector to a matrix.
+//
+// In the call to Dlarfb, the mxn c is multiplied by the implicitly defined matrix h as follows:
+//  c = h * c if side == Left and trans == NoTrans
+//  c = c * h if side == Right and trans == NoTrans
+//  c = h^T * c if side == Left and trans == Trans
+//  c = c * h^t if side == Right and trans == Trans
+// h is a product of elementary reflectors. direct sets the direction of multiplication
+//  h = h_1 * h_2 * ... * h_k if direct == Forward
+//  h = h_k * h_k-1 * ... * h_1 if direct == Backward
+// The combination of direct and store defines the orientation of the elementary
+// reflectors. In all cases the ones on the diagonal are implicitly represented.
+//
+// If direct == lapack.Forward and store == lapack.ColumnWise
+//  V = (  1       )
+//      ( v1  1    )
+//      ( v1 v2  1 )
+//      ( v1 v2 v3 )
+//      ( v1 v2 v3 )
+// If direct == lapack.Forward and store == lapack.RowWise
+//  V = (  1 v1 v1 v1 v1 )
+//      (     1 v2 v2 v2 )
+//      (        1 v3 v3 )
+// If direct == lapack.Backward and store == lapack.ColumnWise
+//  V = ( v1 v2 v3 )
+//      ( v1 v2 v3 )
+//      (  1 v2 v3 )
+//      (     1 v3 )
+//      (        1 )
+// If direct == lapack.Backward and store == lapack.RowWise
+//  V = ( v1 v1  1       )
+//      ( v2 v2 v2  1    )
+//      ( v3 v3 v3 v3  1 )
+// An elementary reflector can be explicitly constructed by extracting the
+// corresponding elements of v, placing a 1 where the diagonal would be, and
+// placing zeros in the remaining elements.
+//
+// t is a k×k matrix containing the block reflector, and this function will panic
+// if t is not of sufficient size. See Dlarft for more information.
+//
+// Work is a temporary storage matrix with stride ldwork.
+// Work must be of size at least n×k side == Left and m×k if side == Right, and
+// this function will panic if this size is not met.
+func (Implementation) Dlarfb(side blas.Side, trans blas.Transpose, direct lapack.Direct,
+	store lapack.StoreV, m, n, k int, v []float64, ldv int, t []float64, ldt int,
+	c []float64, ldc int, work []float64, ldwork int) {
+
+	checkMatrix(m, n, c, ldc)
+	if m == 0 || n == 0 {
+		return
+	}
+	if k < 0 {
+		panic("lapack: negative number of transforms")
+	}
+	if side != blas.Left && side != blas.Right {
+		panic(badSide)
+	}
+	if trans != blas.Trans && trans != blas.NoTrans {
+		panic(badTrans)
+	}
+	if direct != lapack.Forward && direct != lapack.Backward {
+		panic(badDirect)
+	}
+	if store != lapack.ColumnWise && store != lapack.RowWise {
+		panic(badStore)
+	}
+
+	rowsWork := n
+	if side == blas.Right {
+		rowsWork = m
+	}
+	checkMatrix(rowsWork, k, work, ldwork)
+
+	bi := blas64.Implementation()
+
+	transt := blas.Trans
+	if trans == blas.Trans {
+		transt = blas.NoTrans
+	}
+	// TODO(btracey): This follows the original Lapack code where the
+	// elements are copied into the columns of the working array. The
+	// loops should go in the other direction so the data is written
+	// into the rows of work so the copy is not strided. A bigger change
+	// would be to replace work with work^T, but benchmarks would be
+	// needed to see if the change is merited.
+	if store == lapack.ColumnWise {
+		if direct == lapack.Forward {
+			// V1 is the first k rows of C. V2 is the remaining rows.
+			if side == blas.Left {
+				// W = C^T V = C1^T V1 + C2^T V2 (stored in work).
+
+				// W = C1.
+				for j := 0; j < k; j++ {
+					bi.Dcopy(n, c[j*ldc:], 1, work[j:], ldwork)
+				}
+				// W = W * V1.
+				bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit,
+					n, k, 1,
+					v, ldv,
+					work, ldwork)
+				if m > k {
+					// W = W + C2^T V2.
+					bi.Dgemm(blas.Trans, blas.NoTrans, n, k, m-k,
+						1, c[k*ldc:], ldc, v[k*ldv:], ldv,
+						1, work, ldwork)
+				}
+				// W = W * T^T or W * T.
+				bi.Dtrmm(blas.Right, blas.Upper, transt, blas.NonUnit, n, k,
+					1, t, ldt,
+					work, ldwork)
+				// C -= V * W^T.
+				if m > k {
+					// C2 -= V2 * W^T.
+					bi.Dgemm(blas.NoTrans, blas.Trans, m-k, n, k,
+						-1, v[k*ldv:], ldv, work, ldwork,
+						1, c[k*ldc:], ldc)
+				}
+				// W *= V1^T.
+				bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, n, k,
+					1, v, ldv,
+					work, ldwork)
+				// C1 -= W^T.
+				// TODO(btracey): This should use blas.Axpy.
+				for i := 0; i < n; i++ {
+					for j := 0; j < k; j++ {
+						c[j*ldc+i] -= work[i*ldwork+j]
+					}
+				}
+				return
+			}
+			// Form C = C * H or C * H^T, where C = (C1 C2).
+
+			// W = C1.
+			for i := 0; i < k; i++ {
+				bi.Dcopy(m, c[i:], ldc, work[i:], ldwork)
+			}
+			// W *= V1.
+			bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, m, k,
+				1, v, ldv,
+				work, ldwork)
+			if n > k {
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, m, k, n-k,
+					1, c[k:], ldc, v[k*ldv:], ldv,
+					1, work, ldwork)
+			}
+			// W *= T or T^T.
+			bi.Dtrmm(blas.Right, blas.Upper, trans, blas.NonUnit, m, k,
+				1, t, ldt,
+				work, ldwork)
+			if n > k {
+				bi.Dgemm(blas.NoTrans, blas.Trans, m, n-k, k,
+					-1, work, ldwork, v[k*ldv:], ldv,
+					1, c[k:], ldc)
+			}
+			// C -= W * V^T.
+			bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, m, k,
+				1, v, ldv,
+				work, ldwork)
+			// C -= W.
+			// TODO(btracey): This should use blas.Axpy.
+			for i := 0; i < m; i++ {
+				for j := 0; j < k; j++ {
+					c[i*ldc+j] -= work[i*ldwork+j]
+				}
+			}
+			return
+		}
+		// V = (V1)
+		//   = (V2) (last k rows)
+		// Where V2 is unit upper triangular.
+		if side == blas.Left {
+			// Form H * C or
+			// W = C^T V.
+
+			// W = C2^T.
+			for j := 0; j < k; j++ {
+				bi.Dcopy(n, c[(m-k+j)*ldc:], 1, work[j:], ldwork)
+			}
+			// W *= V2.
+			bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, n, k,
+				1, v[(m-k)*ldv:], ldv,
+				work, ldwork)
+			if m > k {
+				// W += C1^T * V1.
+				bi.Dgemm(blas.Trans, blas.NoTrans, n, k, m-k,
+					1, c, ldc, v, ldv,
+					1, work, ldwork)
+			}
+			// W *= T or T^T.
+			bi.Dtrmm(blas.Right, blas.Lower, transt, blas.NonUnit, n, k,
+				1, t, ldt,
+				work, ldwork)
+			// C -= V * W^T.
+			if m > k {
+				bi.Dgemm(blas.NoTrans, blas.Trans, m-k, n, k,
+					-1, v, ldv, work, ldwork,
+					1, c, ldc)
+			}
+			// W *= V2^T.
+			bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, n, k,
+				1, v[(m-k)*ldv:], ldv,
+				work, ldwork)
+			// C2 -= W^T.
+			// TODO(btracey): This should use blas.Axpy.
+			for i := 0; i < n; i++ {
+				for j := 0; j < k; j++ {
+					c[(m-k+j)*ldc+i] -= work[i*ldwork+j]
+				}
+			}
+			return
+		}
+		// Form C * H or C * H^T where C = (C1 C2).
+		// W = C * V.
+
+		// W = C2.
+		for j := 0; j < k; j++ {
+			bi.Dcopy(m, c[n-k+j:], ldc, work[j:], ldwork)
+		}
+
+		// W = W * V2.
+		bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, m, k,
+			1, v[(n-k)*ldv:], ldv,
+			work, ldwork)
+		if n > k {
+			bi.Dgemm(blas.NoTrans, blas.NoTrans, m, k, n-k,
+				1, c, ldc, v, ldv,
+				1, work, ldwork)
+		}
+		// W *= T or T^T.
+		bi.Dtrmm(blas.Right, blas.Lower, trans, blas.NonUnit, m, k,
+			1, t, ldt,
+			work, ldwork)
+		// C -= W * V^T.
+		if n > k {
+			// C1 -= W * V1^T.
+			bi.Dgemm(blas.NoTrans, blas.Trans, m, n-k, k,
+				-1, work, ldwork, v, ldv,
+				1, c, ldc)
+		}
+		// W *= V2^T.
+		bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, m, k,
+			1, v[(n-k)*ldv:], ldv,
+			work, ldwork)
+		// C2 -= W.
+		// TODO(btracey): This should use blas.Axpy.
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				c[i*ldc+n-k+j] -= work[i*ldwork+j]
+			}
+		}
+		return
+	}
+	// Store = Rowwise.
+	if direct == lapack.Forward {
+		// V = (V1 V2) where v1 is unit upper triangular.
+		if side == blas.Left {
+			// Form H * C or H^T * C where C = (C1; C2).
+			// W = C^T * V^T.
+
+			// W = C1^T.
+			for j := 0; j < k; j++ {
+				bi.Dcopy(n, c[j*ldc:], 1, work[j:], ldwork)
+			}
+			// W *= V1^T.
+			bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, n, k,
+				1, v, ldv,
+				work, ldwork)
+			if m > k {
+				bi.Dgemm(blas.Trans, blas.Trans, n, k, m-k,
+					1, c[k*ldc:], ldc, v[k:], ldv,
+					1, work, ldwork)
+			}
+			// W *= T or T^T.
+			bi.Dtrmm(blas.Right, blas.Upper, transt, blas.NonUnit, n, k,
+				1, t, ldt,
+				work, ldwork)
+			// C -= V^T * W^T.
+			if m > k {
+				bi.Dgemm(blas.Trans, blas.Trans, m-k, n, k,
+					-1, v[k:], ldv, work, ldwork,
+					1, c[k*ldc:], ldc)
+			}
+			// W *= V1.
+			bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, n, k,
+				1, v, ldv,
+				work, ldwork)
+			// C1 -= W^T.
+			// TODO(btracey): This should use blas.Axpy.
+			for i := 0; i < n; i++ {
+				for j := 0; j < k; j++ {
+					c[j*ldc+i] -= work[i*ldwork+j]
+				}
+			}
+			return
+		}
+		// Form C * H or C * H^T where C = (C1 C2).
+		// W = C * V^T.
+
+		// W = C1.
+		for j := 0; j < k; j++ {
+			bi.Dcopy(m, c[j:], ldc, work[j:], ldwork)
+		}
+		// W *= V1^T.
+		bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, m, k,
+			1, v, ldv,
+			work, ldwork)
+		if n > k {
+			bi.Dgemm(blas.NoTrans, blas.Trans, m, k, n-k,
+				1, c[k:], ldc, v[k:], ldv,
+				1, work, ldwork)
+		}
+		// W *= T or T^T.
+		bi.Dtrmm(blas.Right, blas.Upper, trans, blas.NonUnit, m, k,
+			1, t, ldt,
+			work, ldwork)
+		// C -= W * V.
+		if n > k {
+			bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n-k, k,
+				-1, work, ldwork, v[k:], ldv,
+				1, c[k:], ldc)
+		}
+		// W *= V1.
+		bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, m, k,
+			1, v, ldv,
+			work, ldwork)
+		// C1 -= W.
+		// TODO(btracey): This should use blas.Axpy.
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				c[i*ldc+j] -= work[i*ldwork+j]
+			}
+		}
+		return
+	}
+	// V = (V1 V2) where V2 is the last k columns and is lower unit triangular.
+	if side == blas.Left {
+		// Form H * C or H^T C where C = (C1 ; C2).
+		// W = C^T * V^T.
+
+		// W = C2^T.
+		for j := 0; j < k; j++ {
+			bi.Dcopy(n, c[(m-k+j)*ldc:], 1, work[j:], ldwork)
+		}
+		// W *= V2^T.
+		bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, n, k,
+			1, v[m-k:], ldv,
+			work, ldwork)
+		if m > k {
+			bi.Dgemm(blas.Trans, blas.Trans, n, k, m-k,
+				1, c, ldc, v, ldv,
+				1, work, ldwork)
+		}
+		// W *= T or T^T.
+		bi.Dtrmm(blas.Right, blas.Lower, transt, blas.NonUnit, n, k,
+			1, t, ldt,
+			work, ldwork)
+		// C -= V^T * W^T.
+		if m > k {
+			bi.Dgemm(blas.Trans, blas.Trans, m-k, n, k,
+				-1, v, ldv, work, ldwork,
+				1, c, ldc)
+		}
+		// W *= V2.
+		bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, n, k,
+			1, v[m-k:], ldv,
+			work, ldwork)
+		// C2 -= W^T.
+		// TODO(btracey): This should use blas.Axpy.
+		for i := 0; i < n; i++ {
+			for j := 0; j < k; j++ {
+				c[(m-k+j)*ldc+i] -= work[i*ldwork+j]
+			}
+		}
+		return
+	}
+	// Form C * H or C * H^T where C = (C1 C2).
+	// W = C * V^T.
+	// W = C2.
+	for j := 0; j < k; j++ {
+		bi.Dcopy(m, c[n-k+j:], ldc, work[j:], ldwork)
+	}
+	// W *= V2^T.
+	bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, m, k,
+		1, v[n-k:], ldv,
+		work, ldwork)
+	if n > k {
+		bi.Dgemm(blas.NoTrans, blas.Trans, m, k, n-k,
+			1, c, ldc, v, ldv,
+			1, work, ldwork)
+	}
+	// W *= T or T^T.
+	bi.Dtrmm(blas.Right, blas.Lower, trans, blas.NonUnit, m, k,
+		1, t, ldt,
+		work, ldwork)
+	// C -= W * V.
+	if n > k {
+		bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n-k, k,
+			-1, work, ldwork, v, ldv,
+			1, c, ldc)
+	}
+	// W *= V2.
+	bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, m, k,
+		1, v[n-k:], ldv,
+		work, ldwork)
+	// C1 -= W.
+	// TODO(btracey): This should use blas.Axpy.
+	for i := 0; i < m; i++ {
+		for j := 0; j < k; j++ {
+			c[i*ldc+n-k+j] -= work[i*ldwork+j]
+		}
+	}
+}
diff --git a/native/dlarfg.go b/native/dlarfg.go
new file mode 100644
index 00000000..bd1edf6e
--- /dev/null
+++ b/native/dlarfg.go
@@ -0,0 +1,60 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"math"
+
+	"github.com/gonum/blas/blas64"
+)
+
+// Dlarfg generates an elementary reflector for a Householder matrix. It creates
+// a real elementary reflector of order n such that
+//  H * (alpha) = (beta)
+//      (    x)   (   0)
+//  H^T * H = I
+// H is represented in the form
+//  H = 1 - tau * (1; v) * (1 v^T)
+// where tau is a real scalar.
+//
+// On entry, x contains the vector x, on exit it contains v.
+func (impl Implementation) Dlarfg(n int, alpha float64, x []float64, incX int) (beta, tau float64) {
+	if n < 0 {
+		panic(nLT0)
+	}
+	if n <= 1 {
+		return alpha, 0
+	}
+	checkVector(n-1, x, incX)
+	bi := blas64.Implementation()
+	xnorm := bi.Dnrm2(n-1, x, incX)
+	if xnorm == 0 {
+		return alpha, 0
+	}
+	beta = -math.Copysign(impl.Dlapy2(alpha, xnorm), alpha)
+	safmin := dlamchS / dlamchE
+	knt := 0
+	if math.Abs(beta) < safmin {
+		// xnorm and beta may be innacurate, scale x and recompute.
+		rsafmn := 1 / safmin
+		for {
+			knt++
+			bi.Dscal(n-1, rsafmn, x, incX)
+			beta *= rsafmn
+			alpha *= rsafmn
+			if math.Abs(beta) >= safmin {
+				break
+			}
+		}
+		xnorm = bi.Dnrm2(n-1, x, incX)
+		beta = -math.Copysign(impl.Dlapy2(alpha, xnorm), alpha)
+	}
+	tau = (beta - alpha) / beta
+	bi.Dscal(n-1, 1/(alpha-beta), x, incX)
+	for j := 0; j < knt; j++ {
+		beta *= safmin
+	}
+	return beta, tau
+}
diff --git a/native/dlarft.go b/native/dlarft.go
new file mode 100644
index 00000000..f99d43d1
--- /dev/null
+++ b/native/dlarft.go
@@ -0,0 +1,148 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/lapack"
+)
+
+// Dlarft forms the triangular factor t of a block reflector, storing the answer
+// in t.
+//  H = 1 - V * T * V^T if store == lapack.ColumnWise
+//  H = 1 - V^T * T * V if store == lapack.RowWise
+// H is defined by a product of the elementary reflectors where
+//  H = H_1 * H_2 * ... * H_k if direct == lapack.Forward
+//  H = H_k * H_k-1 * ... * H_1 if direct == lapack.Backward
+//
+// t is a k×k triangular matrix. t is upper triangular if direct = lapack.Forward
+// and lower triangular otherwise. This function will panic if t is not of
+// sufficient size.
+//
+// store describes the storage of the elementary reflectors in v. Please see
+// Dlarfb for a description of layout.
+//
+// tau contains the scalar factor of the elementary reflectors h.
+func (Implementation) Dlarft(direct lapack.Direct, store lapack.StoreV, n, k int,
+	v []float64, ldv int, tau []float64, t []float64, ldt int) {
+	if n == 0 {
+		return
+	}
+	if n < 0 || k < 0 {
+		panic(negDimension)
+	}
+	if direct != lapack.Forward && direct != lapack.Backward {
+		panic(badDirect)
+	}
+	if store != lapack.RowWise && store != lapack.ColumnWise {
+		panic(badStore)
+	}
+	if len(tau) < k {
+		panic(badTau)
+	}
+	checkMatrix(k, k, t, ldt)
+	bi := blas64.Implementation()
+	// TODO(btracey): There are a number of minor obvious loop optimizations here.
+	// TODO(btracey): It may be possible to rearrange some of the code so that
+	// index of 1 is more common in the Dgemv.
+	if direct == lapack.Forward {
+		prevlastv := n - 1
+		for i := 0; i < k; i++ {
+			prevlastv = max(i, prevlastv)
+			if tau[i] == 0 {
+				for j := 0; j <= i; j++ {
+					t[j*ldt+i] = 0
+				}
+				continue
+			}
+			var lastv int
+			if store == lapack.ColumnWise {
+				// skip trailing zeros
+				for lastv = n - 1; lastv >= i+1; lastv-- {
+					if v[lastv*ldv+i] != 0 {
+						break
+					}
+				}
+				for j := 0; j < i; j++ {
+					t[j*ldt+i] = -tau[i] * v[i*ldv+j]
+				}
+				j := min(lastv, prevlastv)
+				bi.Dgemv(blas.Trans, j-i, i,
+					-tau[i], v[(i+1)*ldv:], ldv, v[(i+1)*ldv+i:], ldv,
+					1, t[i:], ldt)
+			} else {
+				for lastv = n - 1; lastv >= i+1; lastv-- {
+					if v[i*ldv+lastv] != 0 {
+						break
+					}
+				}
+				for j := 0; j < i; j++ {
+					t[j*ldt+i] = -tau[i] * v[j*ldv+i]
+				}
+				j := min(lastv, prevlastv)
+				bi.Dgemv(blas.NoTrans, i, j-i,
+					-tau[i], v[i+1:], ldv, v[i*ldv+i+1:], 1,
+					1, t[i:], ldt)
+			}
+			bi.Dtrmv(blas.Upper, blas.NoTrans, blas.NonUnit, i, t, ldt, t[i:], ldt)
+			t[i*ldt+i] = tau[i]
+			if i > 1 {
+				prevlastv = max(prevlastv, lastv)
+			} else {
+				prevlastv = lastv
+			}
+		}
+		return
+	}
+	prevlastv := 0
+	for i := k - 1; i >= 0; i-- {
+		if tau[i] == 0 {
+			for j := i; j < k; j++ {
+				t[j*ldt+i] = 0
+			}
+			continue
+		}
+		var lastv int
+		if i < k-1 {
+			if store == lapack.ColumnWise {
+				for lastv = 0; lastv < i; lastv++ {
+					if v[lastv*ldv+i] != 0 {
+						break
+					}
+				}
+				for j := i + 1; j < k; j++ {
+					t[j*ldt+i] = -tau[i] * v[(n-k+i)*ldv+j]
+				}
+				j := max(lastv, prevlastv)
+				bi.Dgemv(blas.Trans, n-k+i-j, k-i-1,
+					-tau[i], v[j*ldv+i+1:], ldv, v[j*ldv+i:], ldv,
+					1, t[(i+1)*ldt+i:], ldt)
+			} else {
+				for lastv := 0; lastv < i; lastv++ {
+					if v[i*ldv+lastv] != 0 {
+						break
+					}
+				}
+				for j := i + 1; j < k; j++ {
+					t[j*ldt+i] = -tau[i] * v[j*ldv+n-k+i]
+				}
+				j := max(lastv, prevlastv)
+				bi.Dgemv(blas.NoTrans, k-i-1, n-k+i-j,
+					-tau[i], v[(i+1)*ldv+j:], ldv, v[i*ldv+j:], 1,
+					1, t[(i+1)*ldt+i:], ldt)
+			}
+			bi.Dtrmv(blas.Lower, blas.NoTrans, blas.NonUnit, k-i-1,
+				t[(i+1)*ldt+i+1:], ldt,
+				t[(i+1)*ldt+i:], ldt)
+			if i > 0 {
+				prevlastv = min(prevlastv, lastv)
+			} else {
+				prevlastv = lastv
+			}
+		}
+		t[i*ldt+i] = tau[i]
+	}
+}
diff --git a/native/dlascl.go b/native/dlascl.go
new file mode 100644
index 00000000..93245723
--- /dev/null
+++ b/native/dlascl.go
@@ -0,0 +1,72 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"math"
+
+	"github.com/gonum/lapack"
+)
+
+// Dlascl multiplies a rectangular matrix by a scalar.
+func (impl Implementation) Dlascl(kind lapack.MatrixType, kl, ku int, cfrom, cto float64, m, n int, a []float64, lda int) {
+	checkMatrix(m, n, a, lda)
+	if cfrom == 0 {
+		panic("dlascl: zero divisor")
+	}
+	if math.IsNaN(cfrom) || math.IsNaN(cto) {
+		panic("dlascl: NaN scale factor")
+	}
+	if n == 0 || m == 0 {
+		return
+	}
+	smlnum := dlamchS
+	bignum := 1 / smlnum
+	cfromc := cfrom
+	ctoc := cto
+	cfrom1 := cfromc * smlnum
+	for {
+		var done bool
+		var mul, ctol float64
+		if cfrom1 == cfromc {
+			// cfromc is inf
+			mul = ctoc / cfromc
+			done = true
+			ctol = ctoc
+		} else {
+			ctol = ctoc / bignum
+			if ctol == ctoc {
+				// ctoc is either 0 or inf.
+				mul = ctoc
+				done = true
+				cfromc = 1
+			} else if math.Abs(cfrom1) > math.Abs(ctoc) && ctoc != 0 {
+				mul = smlnum
+				done = false
+				cfromc = cfrom1
+			} else if math.Abs(ctol) > math.Abs(cfromc) {
+				mul = bignum
+				done = false
+				ctoc = ctol
+			} else {
+				mul = ctoc / cfromc
+				done = true
+			}
+		}
+		switch kind {
+		default:
+			panic("lapack: not implemented")
+		case lapack.General:
+			for i := 0; i < m; i++ {
+				for j := 0; j < n; j++ {
+					a[i*lda+j] = a[i*lda+j] * mul
+				}
+			}
+		}
+		if done {
+			break
+		}
+	}
+}
diff --git a/native/dlaset.go b/native/dlaset.go
new file mode 100644
index 00000000..33cfc438
--- /dev/null
+++ b/native/dlaset.go
@@ -0,0 +1,37 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import "github.com/gonum/blas"
+
+// Dlaset sets the off-diagonal elements of a to alpha, and the diagonal elements
+// of a to beta. If uplo == blas.Upper, only the upper diagonal elements are set.
+// If uplo == blas.Lower, only the lower diagonal elements are set. If uplo is
+// otherwise, all of the elements of a are set.
+func (impl Implementation) Dlaset(uplo blas.Uplo, m, n int, alpha, beta float64, a []float64, lda int) {
+	checkMatrix(m, n, a, lda)
+	if uplo == blas.Upper {
+		for i := 0; i < m; i++ {
+			for j := i + 1; j < n; j++ {
+				a[i*lda+j] = alpha
+			}
+		}
+	} else if uplo == blas.Lower {
+		for i := 0; i < m; i++ {
+			for j := 0; j < i; j++ {
+				a[i*lda+j] = alpha
+			}
+		}
+	} else {
+		for i := 0; i < m; i++ {
+			for j := 0; j < n; j++ {
+				a[i*lda+j] = alpha
+			}
+		}
+	}
+	for i := 0; i < min(m, n); i++ {
+		a[i*lda+i] = beta
+	}
+}
diff --git a/native/dlassq.go b/native/dlassq.go
new file mode 100644
index 00000000..7b98c700
--- /dev/null
+++ b/native/dlassq.go
@@ -0,0 +1,29 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import "math"
+
+// Dlassq updates a sum of squares in scaled form. The input parameters scale and
+// sumsq represent the current scale and total sum of squares. These values are
+// updated with the information in the first n elements of the vector specified
+// by x and incX.
+func (impl Implementation) Dlassq(n int, x []float64, incx int, scale float64, sumsq float64) (scl, smsq float64) {
+	if n <= 0 {
+		return scale, sumsq
+	}
+	for ix := 0; ix <= (n-1)*incx; ix += incx {
+		absxi := math.Abs(x[ix])
+		if absxi > 0 || math.IsNaN(absxi) {
+			if scale < absxi {
+				sumsq = 1 + sumsq*(scale/absxi)*(scale/absxi)
+				scale = absxi
+			} else {
+				sumsq += (absxi / scale) * (absxi / scale)
+			}
+		}
+	}
+	return scale, sumsq
+}
diff --git a/native/dorm2r.go b/native/dorm2r.go
new file mode 100644
index 00000000..031480a7
--- /dev/null
+++ b/native/dorm2r.go
@@ -0,0 +1,86 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import "github.com/gonum/blas"
+
+// Dorm2r multiplies a general matrix c by an orthogonal matrix from a QR factorization
+// determined by Dgeqrf.
+//  C = Q * C    if side == blas.Left and trans == blas.NoTrans
+//  C = Q^T * C  if side == blas.Left and trans == blas.Trans
+//  C = C * Q    if side == blas.Right and trans == blas.NoTrans
+//  C = C * Q^T  if side == blas.Right and trans == blas.Trans
+// If side == blas.Left, a is a matrix of size m×k, and if side == blas.Right
+// a is of size n×k.
+//
+// Tau contains the householder factors and is of length at least k and this function
+// will panic otherwise.
+//
+// Work is temporary storage of length at least n if side == blas.Left
+// and at least m if side == blas.Right and this function will panic otherwise.
+func (impl Implementation) Dorm2r(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) {
+	if side != blas.Left && side != blas.Right {
+		panic(badSide)
+	}
+	if trans != blas.Trans && trans != blas.NoTrans {
+		panic(badTrans)
+	}
+
+	left := side == blas.Left
+	notran := trans == blas.NoTrans
+	if left {
+		// Q is m x m
+		checkMatrix(m, k, a, lda)
+		if len(work) < n {
+			panic(badWork)
+		}
+	} else {
+		// Q is n x n
+		checkMatrix(n, k, a, lda)
+		if len(work) < m {
+			panic(badWork)
+		}
+	}
+	checkMatrix(m, n, c, ldc)
+	if m == 0 || n == 0 || k == 0 {
+		return
+	}
+	if len(tau) < k {
+		panic(badTau)
+	}
+	if left {
+		if notran {
+			for i := k - 1; i >= 0; i-- {
+				aii := a[i*lda+i]
+				a[i*lda+i] = 1
+				impl.Dlarf(side, m-i, n, a[i*lda+i:], lda, tau[i], c[i*ldc:], ldc, work)
+				a[i*lda+i] = aii
+			}
+			return
+		}
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m-i, n, a[i*lda+i:], lda, tau[i], c[i*ldc:], ldc, work)
+			a[i*lda+i] = aii
+		}
+		return
+	}
+	if notran {
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m, n-i, a[i*lda+i:], lda, tau[i], c[i:], ldc, work)
+			a[i*lda+i] = aii
+		}
+		return
+	}
+	for i := k - 1; i >= 0; i-- {
+		aii := a[i*lda+i]
+		a[i*lda+i] = 1
+		impl.Dlarf(side, m, n-i, a[i*lda+i:], lda, tau[i], c[i:], ldc, work)
+		a[i*lda+i] = aii
+	}
+}
diff --git a/native/dorml2.go b/native/dorml2.go
new file mode 100644
index 00000000..9d1585cd
--- /dev/null
+++ b/native/dorml2.go
@@ -0,0 +1,83 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import "github.com/gonum/blas"
+
+// Dorml2 multiplies a general matrix c by an orthogonal matrix from an LQ factorization
+// determined by Dgelqf.
+//  C = Q * C    if side == blas.Left and trans == blas.NoTrans
+//  C = Q^T * C  if side == blas.Left and trans == blas.Trans
+//  C = C * Q    if side == blas.Right and trans == blas.NoTrans
+//  C = C * Q^T  if side == blas.Right and trans == blas.Trans
+// If side == blas.Left, a is a matrix of side k×m, and if side == blas.Right
+// a is of size k×n.
+//
+//
+// Tau contains the householder factors and is of length at least k and this function will
+// panic otherwise.
+//
+// Work is temporary storage of length at least n if side == blas.Left
+// and at least m if side == blas.Right and this function will panic otherwise.
+func (impl Implementation) Dorml2(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) {
+	if side != blas.Left && side != blas.Right {
+		panic(badSide)
+	}
+	if trans != blas.Trans && trans != blas.NoTrans {
+		panic(badTrans)
+	}
+
+	left := side == blas.Left
+	notran := trans == blas.NoTrans
+	if left {
+		checkMatrix(k, m, a, lda)
+		if len(work) < n {
+			panic(badWork)
+		}
+	} else {
+		checkMatrix(k, n, a, lda)
+		if len(work) < m {
+			panic(badWork)
+		}
+	}
+	checkMatrix(m, n, c, ldc)
+	if m == 0 || n == 0 || k == 0 {
+		return
+	}
+	switch {
+	case left && notran:
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m-i, n, a[i*lda+i:], 1, tau[i], c[i*ldc:], ldc, work)
+			a[i*lda+i] = aii
+		}
+		return
+	case left && !notran:
+		for i := k - 1; i >= 0; i-- {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m-i, n, a[i*lda+i:], 1, tau[i], c[i*ldc:], ldc, work)
+			a[i*lda+i] = aii
+		}
+		return
+	case !left && notran:
+		for i := k - 1; i >= 0; i-- {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m, n-i, a[i*lda+i:], 1, tau[i], c[i:], ldc, work)
+			a[i*lda+i] = aii
+		}
+		return
+	case !left && !notran:
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m, n-i, a[i*lda+i:], 1, tau[i], c[i:], ldc, work)
+			a[i*lda+i] = aii
+		}
+		return
+	}
+}
diff --git a/native/dormlq.go b/native/dormlq.go
new file mode 100644
index 00000000..4d3cedf6
--- /dev/null
+++ b/native/dormlq.go
@@ -0,0 +1,155 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/lapack"
+)
+
+// Dormlq multiplies the matrix c by the othogonal matrix q defined by the
+// slices a and tau. A and tau are as returned from Dgelqf.
+//  C = Q * C    if side == blas.Left and trans == blas.NoTrans
+//  C = Q^T * C  if side == blas.Left and trans == blas.Trans
+//  C = C * Q    if side == blas.Right and trans == blas.NoTrans
+//  C = C * Q^T  if side == blas.Right and trans == blas.Trans
+// If side == blas.Left, a is a matrix of side k×m, and if side == blas.Right
+// a is of size k×n. This uses a blocked algorithm.
+//
+// Work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= m if side == blas.Left and lwork >= n if side == blas.Right,
+// and this function will panic otherwise.
+// Dormlq uses a block algorithm, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dormlq,
+// the optimal work length will be stored into work[0].
+//
+// Tau contains the householder scales and must have length at least k, and
+// this function will panic otherwise.
+func (impl Implementation) Dormlq(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
+	if side != blas.Left && side != blas.Right {
+		panic(badSide)
+	}
+	if trans != blas.Trans && trans != blas.NoTrans {
+		panic(badTrans)
+	}
+	left := side == blas.Left
+	notran := trans == blas.NoTrans
+	if left {
+		checkMatrix(k, m, a, lda)
+	} else {
+		checkMatrix(k, n, a, lda)
+	}
+	checkMatrix(m, n, c, ldc)
+	if len(tau) < k {
+		panic(badTau)
+	}
+
+	const nbmax = 64
+	nw := n
+	if !left {
+		nw = m
+	}
+	opts := string(side) + string(trans)
+	nb := min(nbmax, impl.Ilaenv(1, "DORMLQ", opts, m, n, k, -1))
+	lworkopt := max(1, nw) * nb
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+	if left {
+		if lwork < n {
+			panic(badWork)
+		}
+	} else {
+		if lwork < m {
+			panic(badWork)
+		}
+	}
+
+	if m == 0 || n == 0 || k == 0 {
+		return
+	}
+	nbmin := 2
+
+	ldwork := nb
+	if nb > 1 && nb < k {
+		iws := nw * nb
+		if lwork < iws {
+			nb = lwork / nw
+			nbmin = max(2, impl.Ilaenv(2, "DORMLQ", opts, m, n, k, -1))
+		}
+	}
+	if nb < nbmin || nb >= k {
+		// Call unblocked code
+		impl.Dorml2(side, trans, m, n, k, a, lda, tau, c, ldc, work)
+		return
+	}
+	ldt := nb
+	t := make([]float64, nb*ldt)
+
+	transt := blas.NoTrans
+	if notran {
+		transt = blas.Trans
+	}
+
+	switch {
+	case left && notran:
+		for i := 0; i < k; i += nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i*ldc:], ldc,
+				work, ldwork)
+		}
+		return
+	case left && !notran:
+		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i*ldc:], ldc,
+				work, ldwork)
+		}
+		return
+	case !left && notran:
+		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i:], ldc,
+				work, ldwork)
+		}
+		return
+	case !left && !notran:
+		for i := 0; i < k; i += nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i:], ldc,
+				work, ldwork)
+		}
+		return
+	}
+}
diff --git a/native/dormqr.go b/native/dormqr.go
new file mode 100644
index 00000000..5d005f11
--- /dev/null
+++ b/native/dormqr.go
@@ -0,0 +1,139 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/lapack"
+)
+
+// Dormqr multiplies the matrix c by the othogonal matrix q defined by the
+// slices a and tau. A and tau are as returned from Dgeqrf.
+//  C = Q * C    if side == blas.Left and trans == blas.NoTrans
+//  C = Q^T * C  if side == blas.Left and trans == blas.Trans
+//  C = C * Q    if side == blas.Right and trans == blas.NoTrans
+//  C = C * Q^T  if side == blas.Right and trans == blas.Trans
+// If side == blas.Left, a is a matrix of side k×m, and if side == blas.Right
+// a is of size k×n. This uses a blocked algorithm.
+//
+// Work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= m if side == blas.Left and lwork >= n if side == blas.Right,
+// and this function will panic otherwise.
+// Dormqr uses a block algorithm, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dormqr,
+// the optimal work length will be stored into work[0].
+//
+// Tau contains the householder scales and must have length at least k, and
+// this function will panic otherwise.
+func (impl Implementation) Dormqr(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
+	left := side == blas.Left
+	notran := trans == blas.NoTrans
+	if left {
+		checkMatrix(m, k, a, lda)
+	} else {
+		checkMatrix(n, k, a, lda)
+	}
+	checkMatrix(m, n, c, ldc)
+
+	const nbmax = 64
+	nw := n
+	if side == blas.Right {
+		nw = m
+	}
+	opts := string(side) + string(trans)
+	nb := min(nbmax, impl.Ilaenv(1, "DORMQR", opts, m, n, k, -1))
+	lworkopt := max(1, nw) * nb
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+	if left {
+		if lwork < n {
+			panic(badWork)
+		}
+	} else {
+		if lwork < m {
+			panic(badWork)
+		}
+	}
+	if m == 0 || n == 0 || k == 0 {
+		return
+	}
+	nbmin := 2
+
+	ldwork := nb
+	if nb > 1 && nb < k {
+		iws := nw * nb
+		if lwork < iws {
+			nb = lwork / nw
+			nbmin = max(2, impl.Ilaenv(2, "DORMQR", opts, m, n, k, -1))
+		}
+	}
+	if nb < nbmin || nb >= k {
+		// Call unblocked code
+		impl.Dorm2r(side, trans, m, n, k, a, lda, tau, c, ldc, work)
+		return
+	}
+	ldt := nb
+	t := make([]float64, nb*ldt)
+	switch {
+	case left && notran:
+		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i*ldc:], ldc,
+				work, ldwork)
+		}
+		return
+	case left && !notran:
+		for i := 0; i < k; i += nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i*ldc:], ldc,
+				work, ldwork)
+		}
+		return
+	case !left && notran:
+		for i := 0; i < k; i += nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i:], ldc,
+				work, ldwork)
+		}
+		return
+	case !left && !notran:
+		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i:], ldc,
+				work, ldwork)
+		}
+		return
+	}
+}
diff --git a/native/dpotf2.go b/native/dpotf2.go
index fab262bf..06924538 100644
--- a/native/dpotf2.go
+++ b/native/dpotf2.go
@@ -43,7 +43,9 @@ func (Implementation) Dpotf2(ul blas.Uplo, n int, a []float64, lda int) (ok bool
 			ajj = math.Sqrt(ajj)
 			a[j*lda+j] = ajj
 			if j < n-1 {
-				bi.Dgemv(blas.Trans, j, n-j-1, -1, a[j+1:], lda, a[j:], lda, 1, a[j*lda+j+1:], 1)
+				bi.Dgemv(blas.Trans, j, n-j-1,
+					-1, a[j+1:], lda, a[j:], lda,
+					1, a[j*lda+j+1:], 1)
 				bi.Dscal(n-j-1, 1/ajj, a[j*lda+j+1:], 1)
 			}
 		}
@@ -61,7 +63,9 @@ func (Implementation) Dpotf2(ul blas.Uplo, n int, a []float64, lda int) (ok bool
 		ajj = math.Sqrt(ajj)
 		a[j*lda+j] = ajj
 		if j < n-1 {
-			bi.Dgemv(blas.NoTrans, n-j-1, j, -1, a[(j+1)*lda:], lda, a[j*lda:], 1, 1, a[(j+1)*lda+j:], lda)
+			bi.Dgemv(blas.NoTrans, n-j-1, j,
+				-1, a[(j+1)*lda:], lda, a[j*lda:], 1,
+				1, a[(j+1)*lda+j:], lda)
 			bi.Dscal(n-j-1, 1/ajj, a[(j+1)*lda+j:], lda)
 		}
 	}
diff --git a/native/dpotrf.go b/native/dpotrf.go
index 110c94bf..4062b902 100644
--- a/native/dpotrf.go
+++ b/native/dpotrf.go
@@ -28,39 +28,47 @@ func (impl Implementation) Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok
 	if n == 0 {
 		return true
 	}
-	nb := blockSize()
+	nb := impl.Ilaenv(1, "DPOTRF", string(ul), n, -1, -1, -1)
 	if n <= nb {
 		return impl.Dpotf2(ul, n, a, lda)
 	}
 	if ul == blas.Upper {
 		for j := 0; j < n; j += nb {
 			jb := min(nb, n-j)
-			bi.Dsyrk(blas.Upper, blas.Trans, jb, j, -1, a[j:], lda, 1, a[j*lda+j:], lda)
+			bi.Dsyrk(blas.Upper, blas.Trans, jb, j,
+				-1, a[j:], lda,
+				1, a[j*lda+j:], lda)
 			ok = impl.Dpotf2(blas.Upper, jb, a[j*lda+j:], lda)
 			if !ok {
 				return ok
 			}
 			if j+jb < n {
-				bi.Dgemm(blas.Trans, blas.NoTrans, jb, n-j-jb, j, -1,
-					a[j:], lda, a[j+jb:], lda, 1, a[j*lda+j+jb:], lda)
-				bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, jb, n-j-jb, 1,
-					a[j*lda+j:], lda, a[j*lda+j+jb:], lda)
+				bi.Dgemm(blas.Trans, blas.NoTrans, jb, n-j-jb, j,
+					-1, a[j:], lda, a[j+jb:], lda,
+					1, a[j*lda+j+jb:], lda)
+				bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, jb, n-j-jb,
+					1, a[j*lda+j:], lda,
+					a[j*lda+j+jb:], lda)
 			}
 		}
 		return true
 	}
 	for j := 0; j < n; j += nb {
 		jb := min(nb, n-j)
-		bi.Dsyrk(blas.Lower, blas.NoTrans, jb, j, -1, a[j*lda:], lda, 1, a[j*lda+j:], lda)
+		bi.Dsyrk(blas.Lower, blas.NoTrans, jb, j,
+			-1, a[j*lda:], lda,
+			1, a[j*lda+j:], lda)
 		ok := impl.Dpotf2(blas.Lower, jb, a[j*lda+j:], lda)
 		if !ok {
 			return ok
 		}
 		if j+jb < n {
-			bi.Dgemm(blas.NoTrans, blas.Trans, n-j-jb, jb, j, -1,
-				a[(j+jb)*lda:], lda, a[j*lda:], lda, 1, a[(j+jb)*lda+j:], lda)
-			bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, n-j-jb, jb, 1,
-				a[j*lda+j:], lda, a[(j+jb)*lda+j:], lda)
+			bi.Dgemm(blas.NoTrans, blas.Trans, n-j-jb, jb, j,
+				-1, a[(j+jb)*lda:], lda, a[j*lda:], lda,
+				1, a[(j+jb)*lda+j:], lda)
+			bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, n-j-jb, jb,
+				1, a[j*lda+j:], lda,
+				a[(j+jb)*lda+j:], lda)
 		}
 	}
 	return true
diff --git a/native/dtrtrs.go b/native/dtrtrs.go
new file mode 100644
index 00000000..d6cb8d2d
--- /dev/null
+++ b/native/dtrtrs.go
@@ -0,0 +1,31 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+)
+
+// Dtrtrs solves a triangular system of the form a * x = b or a^T * x = b. Dtrtrs
+// checks for singularity in a. If a is singular, false is returned and no solve
+// is performed. True is returned otherwise.
+func (impl Implementation) Dtrtrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, nrhs int, a []float64, lda int, b []float64, ldb int) (ok bool) {
+	nounit := diag == blas.NonUnit
+	if n == 0 {
+		return false
+	}
+	// Check for singularity.
+	if nounit {
+		for i := 0; i < n; i++ {
+			if a[i*lda+i] == 0 {
+				return false
+			}
+		}
+	}
+	bi := blas64.Implementation()
+	bi.Dtrsm(blas.Left, uplo, trans, diag, n, nrhs, 1, a, lda, b, ldb)
+	return true
+}
diff --git a/native/general.go b/native/general.go
index bee09601..e42b9a2f 100644
--- a/native/general.go
+++ b/native/general.go
@@ -4,20 +4,57 @@
 
 package native
 
-import "github.com/gonum/lapack"
+import (
+	"math"
 
+	"github.com/gonum/lapack"
+)
+
+// Implementation is the native Go implementation of LAPACK routines. It
+// is built on top of calls to the return of blas64.Implementation(), so while
+// this code is in pure Go, the underlying BLAS implementation may not be.
 type Implementation struct{}
 
 var _ lapack.Float64 = Implementation{}
 
 const (
-	badUplo = "lapack: illegal triangle"
-	nLT0    = "lapack: n < 0"
-	badLdA  = "lapack: index of a out of range"
+	badDirect     = "lapack: bad direct"
+	badLdA        = "lapack: index of a out of range"
+	badSide       = "lapack: bad side"
+	badStore      = "lapack: bad store"
+	badTau        = "lapack: tau has insufficient length"
+	badTrans      = "lapack: bad trans"
+	badUplo       = "lapack: illegal triangle"
+	badWork       = "lapack: insufficient working memory"
+	badWorkStride = "lapack: insufficient working array stride"
+	negDimension  = "lapack: negative matrix dimension"
+	nLT0          = "lapack: n < 0"
+	shortWork     = "lapack: working array shorter than declared"
 )
 
-func blockSize() int {
-	return 64
+// checkMatrix verifies the parameters of a matrix input.
+func checkMatrix(m, n int, a []float64, lda int) {
+	if m < 0 {
+		panic("lapack: has negative number of rows")
+	}
+	if m < 0 {
+		panic("lapack: has negative number of columns")
+	}
+	if lda < n {
+		panic("lapack: stride less than number of columns")
+	}
+	if len(a) < (m-1)*lda+n {
+		panic("lapack: insufficient matrix slice length")
+	}
+}
+
+func checkVector(n int, v []float64, inc int) {
+	if n < 0 {
+		panic("lapack: negative matrix length")
+	}
+	if (inc > 0 && (n-1)*inc >= len(v)) || (inc < 0 && (1-n)*inc >= len(v)) {
+		panic("lapack: insufficient vector slice length")
+	}
 }
 
 func min(a, b int) int {
@@ -33,3 +70,23 @@ func max(a, b int) int {
 	}
 	return b
 }
+
+// dlamch is a function in fortran, but since go forces IEEE-754 these are all
+// fixed values. Probably a way to get them as constants.
+// TODO(btracey): Is there a better way to find the smallest number such that 1+E > 1
+
+var dlamchE, dlamchS, dlamchP float64
+
+func init() {
+	onePlusEps := math.Nextafter(1, math.Inf(1))
+	eps := (math.Nextafter(1, math.Inf(1)) - 1) * 0.5
+	dlamchE = eps
+	sfmin := math.SmallestNonzeroFloat64
+	small := 1 / math.MaxFloat64
+	if small >= sfmin {
+		sfmin = small * onePlusEps
+	}
+	dlamchS = sfmin
+	radix := 2.0
+	dlamchP = radix * eps
+}
diff --git a/native/iladlc.go b/native/iladlc.go
index a66ac235..75b8caf0 100644
--- a/native/iladlc.go
+++ b/native/iladlc.go
@@ -7,9 +7,11 @@ package native
 // Iladlc scans a matrix for its last non-zero column. Returns -1 if the matrix
 // is all zeros.
 func (Implementation) Iladlc(m, n int, a []float64, lda int) int {
-	if n == 0 {
+	if n == 0 || m == 0 {
 		return n - 1
 	}
+	checkMatrix(m, n, a, lda)
+
 	// Test common case where corner is non-zero.
 	if a[n-1] != 0 || a[(m-1)*lda+(n-1)] != 0 {
 		return n - 1
diff --git a/native/iladlr.go b/native/iladlr.go
index 8ca8f81c..6862e158 100644
--- a/native/iladlr.go
+++ b/native/iladlr.go
@@ -10,6 +10,9 @@ func (Implementation) Iladlr(m, n int, a []float64, lda int) int {
 	if m == 0 {
 		return m - 1
 	}
+
+	checkMatrix(m, n, a, lda)
+
 	// Check the common case where the corner is non-zero
 	if a[(m-1)*lda] != 0 || a[(m-1)*lda+n-1] != 0 {
 		return m - 1
diff --git a/native/ilaenv.go b/native/ilaenv.go
new file mode 100644
index 00000000..b420dbf1
--- /dev/null
+++ b/native/ilaenv.go
@@ -0,0 +1,375 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+// Ilaenv returns algorithm tuning parameters for the algorithm given by the
+// input string. ispec specifies the parameter to return.
+//  1: The optimal block size
+//  2: The minimum block size for which the algorithm should be used.
+//  3: The crossover point below which an unblocked routine should be used.
+//  4: The number of shifts.
+//  5: The minumum column dimension for blocking to be used.
+//  6: The crossover point for SVD (to use QR factorization or not).
+//  7: The number of processors.
+//  8: The crossover point for multishift in QR and QZ methods for nonsymmetric eigenvalue problems.
+//  9: Maximum size of the subproblems in divide-and-conquer algorithms.
+//  10: ieee NaN arithmetic can be trusted not to trap.
+//  11: infinity arithmetic can be trusted not to trap.
+func (Implementation) Ilaenv(ispec int, s string, opts string, n1, n2, n3, n4 int) int {
+	// TODO(btracey): Replace this with a constant lookup? A list of constants?
+	// TODO: What is the difference between 2 and 3?
+	sname := s[0] == 'S' || s[0] == 'D'
+	cname := s[0] == 'C' || s[0] == 'Z'
+	if !sname && !cname {
+		panic("lapack: bad name")
+	}
+	c2 := s[1:3]
+	c3 := s[3:6]
+	c4 := c3[1:3]
+
+	switch ispec {
+	default:
+		panic("lapack: bad ispec")
+	case 1:
+		switch c2 {
+		default:
+			panic("lapack: bad function name")
+		case "GE":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "TRF":
+				if sname {
+					return 64
+				}
+				return 64
+			case "QRF", "RQF", "LQF", "QLF":
+				if sname {
+					return 32
+				}
+				return 32
+			case "HRD":
+				if sname {
+					return 32
+				}
+				return 32
+			case "BRD":
+				if sname {
+					return 32
+				}
+				return 32
+			case "TRI":
+				if sname {
+					return 64
+				}
+				return 64
+			}
+		case "PO":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "TRF":
+				if sname {
+					return 64
+				}
+				return 64
+			}
+		case "SY":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "TRF":
+				if sname {
+					return 64
+				}
+				return 64
+			case "TRD":
+				return 32
+			case "GST":
+				return 64
+			}
+		case "HE":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "TRF":
+				return 64
+			case "TRD":
+				return 32
+			case "GST":
+				return 64
+			}
+		case "OR":
+			switch c3[0] {
+			default:
+				panic("lapack: bad function name")
+			case 'G':
+				switch c3[1:] {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 32
+				}
+			case 'M':
+				switch c3[1:] {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 32
+				}
+			}
+		case "UN":
+			switch c3[0] {
+			default:
+				panic("lapack: bad function name")
+			case 'G':
+				switch c3[1:] {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 32
+				}
+			case 'M':
+				switch c3[1:] {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 32
+				}
+			}
+		case "GB":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "TRF":
+				if sname {
+					if n4 <= 64 {
+						return 1
+					}
+					return 32
+				}
+				if n4 <= 64 {
+					return 1
+				}
+				return 32
+			}
+		case "PB":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "TRF":
+				if sname {
+					if n4 <= 64 {
+						return 1
+					}
+					return 32
+				}
+				if n4 <= 64 {
+					return 1
+				}
+				return 32
+			}
+		case "TR":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "TRI":
+				if sname {
+					return 64
+				}
+				return 64
+			}
+		case "LA":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "UUM":
+				if sname {
+					return 64
+				}
+				return 64
+			}
+		case "ST":
+			if sname && c3 == "EBZ" {
+				return 1
+			}
+			panic("lapack: bad function name")
+		}
+	case 2:
+		switch c2 {
+		default:
+			panic("lapack: bad function name")
+		case "GE":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "QRF", "RQF", "LQF", "QLF":
+				if sname {
+					return 2
+				}
+				return 2
+			case "HRD":
+				if sname {
+					return 2
+				}
+				return 2
+			case "BRD":
+				if sname {
+					return 2
+				}
+				return 2
+			case "TRI":
+				if sname {
+					return 2
+				}
+				return 2
+			}
+		case "SY":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "TRF":
+				if sname {
+					return 8
+				}
+				return 8
+			case "TRD":
+				if sname {
+					return 2
+				}
+				panic("lapack: bad function name")
+			}
+		case "HE":
+			if c3 == "TRD" {
+				return 2
+			}
+			panic("lapack: bad function name")
+		case "OR":
+			if !sname {
+				panic("lapack: bad function name")
+			}
+			switch c3[0] {
+			default:
+				panic("lapack: bad function name")
+			case 'G':
+				switch c4 {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 2
+				}
+			case 'M':
+				switch c4 {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 2
+				}
+			}
+		case "UN":
+			switch c3[0] {
+			default:
+				panic("lapack: bad function name")
+			case 'G':
+				switch c4 {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 2
+				}
+			case 'M':
+				switch c4 {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 2
+				}
+			}
+		}
+	case 3:
+		switch c2 {
+		default:
+			panic("lapack: bad function name")
+		case "GE":
+			switch c3 {
+			default:
+				panic("lapack: bad function name")
+			case "QRF", "RQF", "LQF", "QLF":
+				if sname {
+					return 128
+				}
+				return 128
+			case "HRD":
+				if sname {
+					return 128
+				}
+				return 128
+			case "BRD":
+				if sname {
+					return 128
+				}
+				return 128
+			}
+		case "SY":
+			if sname && c3 == "TRD" {
+				return 32
+			}
+			panic("lapack: bad function name")
+		case "HE":
+			if c3 == "TRD" {
+				return 32
+			}
+			panic("lapack: bad function name")
+		case "OR":
+			switch c3[0] {
+			default:
+				panic("lapack: bad function name")
+			case 'G':
+				switch c4 {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 128
+				}
+			}
+		case "UN":
+			switch c3[0] {
+			default:
+				panic("lapack: bad function name")
+			case 'G':
+				switch c4 {
+				default:
+					panic("lapack: bad function name")
+				case "QR", "RQ", "LQ", "QL", "HR", "TR", "BR":
+					return 128
+				}
+			}
+		}
+	case 4:
+		// Used by xHSEQR
+		return 6
+	case 5:
+		// Not used
+		return 2
+	case 6:
+		// Used by xGELSS and xGESVD
+		return min(n1, n2) * 1e6
+	case 7:
+		// Not used
+		return 1
+	case 8:
+		// Used by xHSEQR
+		return 50
+	case 9:
+		// used by xGELSD and xGESDD
+		return 25
+	case 10:
+		// Go guarantees ieee
+		return 1
+	case 11:
+		// Go guarantees ieee
+		return 1
+	}
+}
diff --git a/native/lapack_test.go b/native/lapack_test.go
index e96a85c4..9addf64a 100644
--- a/native/lapack_test.go
+++ b/native/lapack_test.go
@@ -12,10 +12,62 @@ import (
 
 var impl = Implementation{}
 
+func TestDgelqf(t *testing.T) {
+	testlapack.DgelqfTest(t, impl)
+}
+
+func TestDgelq2(t *testing.T) {
+	testlapack.Dgelq2Test(t, impl)
+}
+
+func TestDgels(t *testing.T) {
+	testlapack.DgelsTest(t, impl)
+}
+
+func TestDgeqr2(t *testing.T) {
+	testlapack.Dgeqr2Test(t, impl)
+}
+
+func TestDgeqrf(t *testing.T) {
+	testlapack.DgeqrfTest(t, impl)
+}
+
+func TestDlange(t *testing.T) {
+	testlapack.DlangeTest(t, impl)
+}
+
+func TestDlarfb(t *testing.T) {
+	testlapack.DlarfbTest(t, impl)
+}
+
 func TestDlarf(t *testing.T) {
 	testlapack.DlarfTest(t, impl)
 }
 
+func TestDlarfg(t *testing.T) {
+	testlapack.DlarfgTest(t, impl)
+}
+
+func TestDlarft(t *testing.T) {
+	testlapack.DlarftTest(t, impl)
+}
+
+func TestDorml2(t *testing.T) {
+	testlapack.Dorml2Test(t, impl)
+}
+
+func TestDormlq(t *testing.T) {
+	testlapack.DormlqTest(t, impl)
+}
+
+func TestDormqr(t *testing.T) {
+	testlapack.DormqrTest(t, impl)
+}
+
+func TestDorm2r(t *testing.T) {
+	testlapack.Dorm2rTest(t, impl)
+}
+
 func TestDpotf2(t *testing.T) {
 	testlapack.Dpotf2Test(t, impl)
 }
diff --git a/testlapack/dgelq2.go b/testlapack/dgelq2.go
new file mode 100644
index 00000000..4ba0c2ac
--- /dev/null
+++ b/testlapack/dgelq2.go
@@ -0,0 +1,112 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math"
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/floats"
+)
+
+type Dgelq2er interface {
+	Dgelq2(m, n int, a []float64, lda int, tau, work []float64)
+}
+
+func Dgelq2Test(t *testing.T, impl Dgelq2er) {
+	for c, test := range []struct {
+		m, n, lda int
+	}{
+		{1, 1, 0},
+		{2, 2, 0},
+		{3, 2, 0},
+		{2, 3, 0},
+		{1, 12, 0},
+		{2, 6, 0},
+		{3, 4, 0},
+		{4, 3, 0},
+		{6, 2, 0},
+		{1, 12, 0},
+		{1, 1, 20},
+		{2, 2, 20},
+		{3, 2, 20},
+		{2, 3, 20},
+		{1, 12, 20},
+		{2, 6, 20},
+		{3, 4, 20},
+		{4, 3, 20},
+		{6, 2, 20},
+		{1, 12, 20},
+	} {
+		n := test.n
+		m := test.m
+		lda := test.lda
+		if lda == 0 {
+			lda = test.n
+		}
+		k := min(m, n)
+		tau := make([]float64, k)
+		for i := range tau {
+			tau[i] = rand.Float64()
+		}
+		work := make([]float64, m)
+		for i := range work {
+			work[i] = rand.Float64()
+		}
+		a := make([]float64, m*lda)
+		for i := 0; i < m*lda; i++ {
+			a[i] = rand.Float64()
+		}
+		aCopy := make([]float64, len(a))
+		copy(aCopy, a)
+		impl.Dgelq2(m, n, a, lda, tau, work)
+
+		Q := constructQ("LQ", m, n, a, lda, tau)
+
+		// Check that Q is orthonormal
+		for i := 0; i < Q.Rows; i++ {
+			nrm := blas64.Nrm2(Q.Cols, blas64.Vector{Inc: 1, Data: Q.Data[i*Q.Stride:]})
+			if math.Abs(nrm-1) > 1e-14 {
+				t.Errorf("Q not normal. Norm is %v", nrm)
+			}
+			for j := 0; j < i; j++ {
+				dot := blas64.Dot(Q.Rows,
+					blas64.Vector{Inc: 1, Data: Q.Data[i*Q.Stride:]},
+					blas64.Vector{Inc: 1, Data: Q.Data[j*Q.Stride:]},
+				)
+				if math.Abs(dot) > 1e-14 {
+					t.Errorf("Q not orthogonal. Dot is %v", dot)
+				}
+			}
+		}
+
+		L := blas64.General{
+			Rows:   m,
+			Cols:   n,
+			Stride: n,
+			Data:   make([]float64, m*n),
+		}
+		for i := 0; i < m; i++ {
+			for j := 0; j <= min(i, n-1); j++ {
+				L.Data[i*L.Stride+j] = a[i*lda+j]
+			}
+		}
+
+		ans := blas64.General{
+			Rows:   m,
+			Cols:   n,
+			Stride: lda,
+			Data:   make([]float64, m*lda),
+		}
+		copy(ans.Data, aCopy)
+		blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, L, Q, 0, ans)
+		if !floats.EqualApprox(aCopy, ans.Data, 1e-14) {
+			t.Errorf("Case %v, LQ mismatch. Want %v, got %v.", c, aCopy, ans.Data)
+		}
+	}
+}
diff --git a/testlapack/dgelqf.go b/testlapack/dgelqf.go
new file mode 100644
index 00000000..a5b6a2cb
--- /dev/null
+++ b/testlapack/dgelqf.go
@@ -0,0 +1,94 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/floats"
+)
+
+type Dgelqfer interface {
+	Dgelq2er
+	Dgelqf(m, n int, a []float64, lda int, tau, work []float64, lwork int)
+}
+
+func DgelqfTest(t *testing.T, impl Dgelqfer) {
+	for c, test := range []struct {
+		m, n, lda int
+	}{
+		{10, 5, 0},
+		{5, 10, 0},
+		{10, 10, 0},
+		{300, 5, 0},
+		{3, 500, 0},
+		{200, 200, 0},
+		{300, 200, 0},
+		{204, 300, 0},
+		{1, 3000, 0},
+		{3000, 1, 0},
+		{10, 5, 30},
+		{5, 10, 30},
+		{10, 10, 30},
+		{300, 5, 500},
+		{3, 500, 600},
+		{200, 200, 300},
+		{300, 200, 300},
+		{204, 300, 400},
+		{1, 3000, 4000},
+		{3000, 1, 4000},
+	} {
+		m := test.m
+		n := test.n
+		lda := test.lda
+		if lda == 0 {
+			lda = n
+		}
+		a := make([]float64, m*lda)
+		for i := 0; i < m; i++ {
+			for j := 0; j < n; j++ {
+				a[i*lda+j] = rand.Float64()
+			}
+		}
+		tau := make([]float64, n)
+		for i := 0; i < n; i++ {
+			tau[i] = rand.Float64()
+		}
+		aCopy := make([]float64, len(a))
+		copy(aCopy, a)
+		ans := make([]float64, len(a))
+		copy(ans, a)
+		work := make([]float64, m)
+		for i := range work {
+			work[i] = rand.Float64()
+		}
+		// Compute unblocked QR.
+		impl.Dgelq2(m, n, ans, lda, tau, work)
+		// Compute blocked QR with small work.
+		impl.Dgelqf(m, n, a, lda, tau, work, len(work))
+		if !floats.EqualApprox(ans, a, 1e-14) {
+			t.Errorf("Case %v, mismatch small work.", c)
+		}
+		// Try the full length of work.
+		impl.Dgelqf(m, n, a, lda, tau, work, -1)
+		lwork := int(work[0])
+		work = make([]float64, lwork)
+		copy(a, aCopy)
+		impl.Dgelqf(m, n, a, lda, tau, work, lwork)
+		if !floats.EqualApprox(ans, a, 1e-12) {
+			t.Errorf("Case %v, mismatch large work.", c)
+		}
+
+		// Try a slightly smaller version of work to test blocking code.
+		work = work[1:]
+		lwork--
+		copy(a, aCopy)
+		impl.Dgelqf(m, n, a, lda, tau, work, lwork)
+		if !floats.EqualApprox(ans, a, 1e-12) {
+			t.Errorf("Case %v, mismatch large work.", c)
+		}
+	}
+}
diff --git a/testlapack/dgels.go b/testlapack/dgels.go
new file mode 100644
index 00000000..808a6479
--- /dev/null
+++ b/testlapack/dgels.go
@@ -0,0 +1,181 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/floats"
+)
+
+type Dgelser interface {
+	Dgels(trans blas.Transpose, m, n, nrhs int, a []float64, lda int, b []float64, ldb int, work []float64, lwork int) bool
+}
+
+func DgelsTest(t *testing.T, impl Dgelser) {
+	for _, trans := range []blas.Transpose{blas.NoTrans, blas.Trans} {
+		for _, test := range []struct {
+			m, n, nrhs, lda, ldb int
+		}{
+			{3, 4, 5, 0, 0},
+			{3, 5, 4, 0, 0},
+			{4, 3, 5, 0, 0},
+			{4, 5, 3, 0, 0},
+			{5, 3, 4, 0, 0},
+			{5, 4, 3, 0, 0},
+			{3, 4, 5, 10, 20},
+			{3, 5, 4, 10, 20},
+			{4, 3, 5, 10, 20},
+			{4, 5, 3, 10, 20},
+			{5, 3, 4, 10, 20},
+			{5, 4, 3, 10, 20},
+			{3, 4, 5, 20, 10},
+			{3, 5, 4, 20, 10},
+			{4, 3, 5, 20, 10},
+			{4, 5, 3, 20, 10},
+			{5, 3, 4, 20, 10},
+			{5, 4, 3, 20, 10},
+			{200, 300, 400, 0, 0},
+			{200, 400, 300, 0, 0},
+			{300, 200, 400, 0, 0},
+			{300, 400, 200, 0, 0},
+			{400, 200, 300, 0, 0},
+			{400, 300, 200, 0, 0},
+			{200, 300, 400, 500, 600},
+			{200, 400, 300, 500, 600},
+			{300, 200, 400, 500, 600},
+			{300, 400, 200, 500, 600},
+			{400, 200, 300, 500, 600},
+			{400, 300, 200, 500, 600},
+			{200, 300, 400, 600, 500},
+			{200, 400, 300, 600, 500},
+			{300, 200, 400, 600, 500},
+			{300, 400, 200, 600, 500},
+			{400, 200, 300, 600, 500},
+			{400, 300, 200, 600, 500},
+		} {
+			m := test.m
+			n := test.n
+			nrhs := test.nrhs
+
+			lda := test.lda
+			if lda == 0 {
+				lda = n
+			}
+			a := make([]float64, m*lda)
+			for i := range a {
+				a[i] = rand.Float64()
+			}
+			aCopy := make([]float64, len(a))
+			copy(aCopy, a)
+
+			// Size of b is the same trans or no trans, because the number of rows
+			// has to be the max of (m,n).
+			mb := max(m, n)
+			nb := nrhs
+			ldb := test.ldb
+			if ldb == 0 {
+				ldb = nb
+			}
+			b := make([]float64, mb*ldb)
+			for i := range b {
+				b[i] = rand.Float64()
+			}
+			bCopy := make([]float64, len(b))
+			copy(bCopy, b)
+
+			// Find optimal work length.
+			work := make([]float64, 1)
+			impl.Dgels(trans, m, n, nrhs, a, lda, b, ldb, work, -1)
+
+			// Perform linear solve
+			work = make([]float64, int(work[0]))
+			lwork := len(work)
+			for i := range work {
+				work[i] = rand.Float64()
+			}
+			impl.Dgels(trans, m, n, nrhs, a, lda, b, ldb, work, lwork)
+
+			// Check that the answer is correct by comparing to the normal equations.
+			aMat := blas64.General{
+				Rows:   m,
+				Cols:   n,
+				Stride: lda,
+				Data:   make([]float64, len(aCopy)),
+			}
+			copy(aMat.Data, aCopy)
+			szAta := n
+			if trans == blas.Trans {
+				szAta = m
+			}
+			aTA := blas64.General{
+				Rows:   szAta,
+				Cols:   szAta,
+				Stride: szAta,
+				Data:   make([]float64, szAta*szAta),
+			}
+
+			// Compute A^T * A if notrans and A * A^T otherwise.
+			if trans == blas.NoTrans {
+				blas64.Gemm(blas.Trans, blas.NoTrans, 1, aMat, aMat, 0, aTA)
+			} else {
+				blas64.Gemm(blas.NoTrans, blas.Trans, 1, aMat, aMat, 0, aTA)
+			}
+
+			// Multiply by X.
+			X := blas64.General{
+				Rows:   szAta,
+				Cols:   nrhs,
+				Stride: ldb,
+				Data:   b,
+			}
+			ans := blas64.General{
+				Rows:   aTA.Rows,
+				Cols:   X.Cols,
+				Stride: X.Cols,
+				Data:   make([]float64, aTA.Rows*X.Cols),
+			}
+			blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, aTA, X, 0, ans)
+
+			B := blas64.General{
+				Rows:   szAta,
+				Cols:   nrhs,
+				Stride: ldb,
+				Data:   make([]float64, len(bCopy)),
+			}
+
+			copy(B.Data, bCopy)
+			var ans2 blas64.General
+			if trans == blas.NoTrans {
+				ans2 = blas64.General{
+					Rows:   aMat.Cols,
+					Cols:   B.Cols,
+					Stride: B.Cols,
+					Data:   make([]float64, aMat.Cols*B.Cols),
+				}
+			} else {
+				ans2 = blas64.General{
+					Rows:   aMat.Rows,
+					Cols:   B.Cols,
+					Stride: B.Cols,
+					Data:   make([]float64, aMat.Rows*B.Cols),
+				}
+			}
+
+			// Compute A^T B if Trans or A * B otherwise
+			if trans == blas.NoTrans {
+				blas64.Gemm(blas.Trans, blas.NoTrans, 1, aMat, B, 0, ans2)
+			} else {
+				blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, aMat, B, 0, ans2)
+			}
+			if !floats.EqualApprox(ans.Data, ans2.Data, 1e-12) {
+				t.Errorf("Normal equations not satisfied")
+			}
+		}
+	}
+}
diff --git a/testlapack/dgeqr2.go b/testlapack/dgeqr2.go
new file mode 100644
index 00000000..fdbdcdd0
--- /dev/null
+++ b/testlapack/dgeqr2.go
@@ -0,0 +1,110 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math"
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/floats"
+)
+
+type Dgeqr2er interface {
+	Dgeqr2(m, n int, a []float64, lda int, tau []float64, work []float64)
+}
+
+func Dgeqr2Test(t *testing.T, impl Dgeqr2er) {
+	for c, test := range []struct {
+		m, n, lda int
+	}{
+		{1, 1, 0},
+		{2, 2, 0},
+		{3, 2, 0},
+		{2, 3, 0},
+		{1, 12, 0},
+		{2, 6, 0},
+		{3, 4, 0},
+		{4, 3, 0},
+		{6, 2, 0},
+		{12, 1, 0},
+		{1, 1, 20},
+		{2, 2, 20},
+		{3, 2, 20},
+		{2, 3, 20},
+		{1, 12, 20},
+		{2, 6, 20},
+		{3, 4, 20},
+		{4, 3, 20},
+		{6, 2, 20},
+		{12, 1, 20},
+	} {
+		n := test.n
+		m := test.m
+		lda := test.lda
+		if lda == 0 {
+			lda = test.n
+		}
+		a := make([]float64, m*lda)
+		for i := range a {
+			a[i] = rand.Float64()
+		}
+		aCopy := make([]float64, len(a))
+		k := min(m, n)
+		tau := make([]float64, k)
+		for i := range tau {
+			tau[i] = rand.Float64()
+		}
+		work := make([]float64, n)
+		for i := range work {
+			work[i] = rand.Float64()
+		}
+		copy(aCopy, a)
+		impl.Dgeqr2(m, n, a, lda, tau, work)
+
+		// Test that the QR factorization has completed successfully. Compute
+		// Q based on the vectors.
+		q := constructQ("QR", m, n, a, lda, tau)
+
+		// Check that q is orthonormal
+		for i := 0; i < m; i++ {
+			nrm := blas64.Nrm2(m, blas64.Vector{1, q.Data[i*m:]})
+			if math.Abs(nrm-1) > 1e-14 {
+				t.Errorf("Case %v, q not normal", c)
+			}
+			for j := 0; j < i; j++ {
+				dot := blas64.Dot(m, blas64.Vector{1, q.Data[i*m:]}, blas64.Vector{1, q.Data[j*m:]})
+				if math.Abs(dot) > 1e-14 {
+					t.Errorf("Case %v, q not orthogonal", i)
+				}
+			}
+		}
+		// Check that A = Q * R
+		r := blas64.General{
+			Rows:   m,
+			Cols:   n,
+			Stride: n,
+			Data:   make([]float64, m*n),
+		}
+		for i := 0; i < m; i++ {
+			for j := i; j < n; j++ {
+				r.Data[i*n+j] = a[i*lda+j]
+			}
+		}
+		atmp := blas64.General{
+			Rows:   m,
+			Cols:   n,
+			Stride: lda,
+			Data:   make([]float64, m*lda),
+		}
+		copy(atmp.Data, a)
+		blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, q, r, 0, atmp)
+		if !floats.EqualApprox(atmp.Data, aCopy, 1e-14) {
+			t.Errorf("Q*R != a")
+		}
+	}
+}
diff --git a/testlapack/dgeqrf.go b/testlapack/dgeqrf.go
new file mode 100644
index 00000000..c5964150
--- /dev/null
+++ b/testlapack/dgeqrf.go
@@ -0,0 +1,91 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/floats"
+)
+
+type Dgeqrfer interface {
+	Dgeqr2er
+	Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int)
+}
+
+func DgeqrfTest(t *testing.T, impl Dgeqrfer) {
+	for c, test := range []struct {
+		m, n, lda int
+	}{
+		{10, 5, 0},
+		{5, 10, 0},
+		{10, 10, 0},
+		{300, 5, 0},
+		{3, 500, 0},
+		{200, 200, 0},
+		{300, 200, 0},
+		{204, 300, 0},
+		{1, 3000, 0},
+		{3000, 1, 0},
+		{10, 5, 20},
+		{5, 10, 20},
+		{10, 10, 20},
+		{300, 5, 400},
+		{3, 500, 600},
+		{200, 200, 300},
+		{300, 200, 300},
+		{204, 300, 400},
+		{1, 3000, 4000},
+		{3000, 1, 4000},
+	} {
+		m := test.m
+		n := test.n
+		lda := test.lda
+		if lda == 0 {
+			lda = test.n
+		}
+		a := make([]float64, m*lda)
+		for i := 0; i < m; i++ {
+			for j := 0; j < n; j++ {
+				a[i*lda+j] = rand.Float64()
+			}
+		}
+		tau := make([]float64, n)
+		for i := 0; i < n; i++ {
+			tau[i] = rand.Float64()
+		}
+		aCopy := make([]float64, len(a))
+		copy(aCopy, a)
+		ans := make([]float64, len(a))
+		copy(ans, a)
+		work := make([]float64, n)
+		// Compute unblocked QR.
+		impl.Dgeqr2(m, n, ans, lda, tau, work)
+		// Compute blocked QR with small work.
+		impl.Dgeqrf(m, n, a, lda, tau, work, len(work))
+		if !floats.EqualApprox(ans, a, 1e-14) {
+			t.Errorf("Case %v, mismatch small work.", c)
+		}
+		// Try the full length of work.
+		impl.Dgeqrf(m, n, a, lda, tau, work, -1)
+		lwork := int(work[0])
+		work = make([]float64, lwork)
+		copy(a, aCopy)
+		impl.Dgeqrf(m, n, a, lda, tau, work, lwork)
+		if !floats.EqualApprox(ans, a, 1e-12) {
+			t.Errorf("Case %v, mismatch large work.", c)
+		}
+
+		// Try a slightly smaller version of work to test blocking.
+		work = work[1:]
+		lwork--
+		copy(a, aCopy)
+		impl.Dgeqrf(m, n, a, lda, tau, work, lwork)
+		if !floats.EqualApprox(ans, a, 1e-12) {
+			t.Errorf("Case %v, mismatch large work.", c)
+		}
+	}
+}
diff --git a/testlapack/dlange.go b/testlapack/dlange.go
new file mode 100644
index 00000000..efbada0d
--- /dev/null
+++ b/testlapack/dlange.go
@@ -0,0 +1,92 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math"
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/lapack"
+)
+
+type Dlanger interface {
+	Dlange(norm lapack.MatrixNorm, m, n int, a []float64, lda int, work []float64) float64
+}
+
+func DlangeTest(t *testing.T, impl Dlanger) {
+	for _, test := range []struct {
+		m, n, lda int
+	}{
+		{4, 3, 0},
+		{3, 4, 0},
+		{4, 3, 100},
+		{3, 4, 100},
+	} {
+		m := test.m
+		n := test.n
+		lda := test.lda
+		if lda == 0 {
+			lda = n
+		}
+		a := make([]float64, m*lda)
+		for i := range a {
+			a[i] = (rand.Float64() - 0.5)
+		}
+		work := make([]float64, n)
+		for i := range work {
+			work[i] = rand.Float64()
+		}
+		aCopy := make([]float64, len(a))
+		copy(aCopy, a)
+
+		// Test MaxAbs norm.
+		norm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, work)
+		var ans float64
+		for i := 0; i < m; i++ {
+			idx := blas64.Iamax(n, blas64.Vector{1, aCopy[i*lda:]})
+			ans = math.Max(ans, math.Abs(a[i*lda+idx]))
+		}
+		// Should be strictly equal because there is no floating point summation error.
+		if ans != norm {
+			t.Errorf("MaxAbs mismatch. Want %v, got %v.", ans, norm)
+		}
+
+		// Test MaxColumnSum norm.
+		norm = impl.Dlange(lapack.MaxColumnSum, m, n, a, lda, work)
+		ans = 0
+		for i := 0; i < n; i++ {
+			sum := blas64.Asum(m, blas64.Vector{lda, aCopy[i:]})
+			ans = math.Max(ans, sum)
+		}
+		if math.Abs(norm-ans) > 1e-14 {
+			t.Errorf("MaxColumnSum mismatch. Want %v, got %v.", ans, norm)
+		}
+
+		// Test MaxRowSum norm.
+		norm = impl.Dlange(lapack.MaxRowSum, m, n, a, lda, work)
+		ans = 0
+		for i := 0; i < m; i++ {
+			sum := blas64.Asum(n, blas64.Vector{1, aCopy[i*lda:]})
+			ans = math.Max(ans, sum)
+		}
+		if math.Abs(norm-ans) > 1e-14 {
+			t.Errorf("MaxRowSum mismatch. Want %v, got %v.", ans, norm)
+		}
+
+		// Test Frobenius norm
+		norm = impl.Dlange(lapack.NormFrob, m, n, a, lda, work)
+		ans = 0
+		for i := 0; i < m; i++ {
+			sum := blas64.Nrm2(n, blas64.Vector{1, aCopy[i*lda:]})
+			ans += sum * sum
+		}
+		ans = math.Sqrt(ans)
+		if math.Abs(norm-ans) > 1e-14 {
+			t.Errorf("NormFrob mismatch. Want %v, got %v.", ans, norm)
+		}
+	}
+}
diff --git a/testlapack/dlarf.go b/testlapack/dlarf.go
index 2f4142bb..816ed9e4 100644
--- a/testlapack/dlarf.go
+++ b/testlapack/dlarf.go
@@ -63,6 +63,19 @@ func DlarfTest(t *testing.T, impl Dlarfer) {
 
 			tau: 2,
 		},
+		{
+			m:   2,
+			n:   3,
+			ldc: 3,
+
+			incv:  4,
+			lastv: 0,
+
+			lastr: 0,
+			lastc: 1,
+
+			tau: 2,
+		},
 		{
 			m:   10,
 			n:   10,
@@ -93,7 +106,7 @@ func DlarfTest(t *testing.T, impl Dlarfer) {
 		sz := max(test.m, test.n) // so v works for both right and left side.
 		v := make([]float64, test.incv*sz+1)
 		// Fill with nonzero entries up until lastv.
-		for i := 0; i < test.lastv; i++ {
+		for i := 0; i <= test.lastv; i++ {
 			v[i*test.incv] = rand.Float64()
 		}
 		// Construct h explicitly to compare.
@@ -132,7 +145,7 @@ func DlarfTest(t *testing.T, impl Dlarfer) {
 		work := make([]float64, sz)
 		impl.Dlarf(blas.Right, test.m, test.n, v, test.incv, test.tau, c, test.ldc, work)
 		if !floats.EqualApprox(c, cMat.Data, 1e-14) {
-			t.Errorf("Dlarf mismatch case %v. Want %v, got %v", i, cMat.Data, c)
+			t.Errorf("Dlarf mismatch right, case %v. Want %v, got %v", i, cMat.Data, c)
 		}
 
 		// Test on the left side.
@@ -153,7 +166,7 @@ func DlarfTest(t *testing.T, impl Dlarfer) {
 		blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, hMat, cMat2, 0, cMat)
 		impl.Dlarf(blas.Left, test.m, test.n, v, test.incv, test.tau, c, test.ldc, work)
 		if !floats.EqualApprox(c, cMat.Data, 1e-14) {
-			t.Errorf("Dlarf mismatch case %v. Want %v, got %v", i, cMat.Data, c)
+			t.Errorf("Dlarf mismatch left, case %v. Want %v, got %v", i, cMat.Data, c)
 		}
 	}
 }
diff --git a/testlapack/dlarfb.go b/testlapack/dlarfb.go
new file mode 100644
index 00000000..b23cf1ce
--- /dev/null
+++ b/testlapack/dlarfb.go
@@ -0,0 +1,160 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/floats"
+	"github.com/gonum/lapack"
+)
+
+type Dlarfber interface {
+	Dlarfter
+	Dlarfb(side blas.Side, trans blas.Transpose, direct lapack.Direct,
+		store lapack.StoreV, m, n, k int, v []float64, ldv int, t []float64, ldt int,
+		c []float64, ldc int, work []float64, ldwork int)
+}
+
+func DlarfbTest(t *testing.T, impl Dlarfber) {
+	for _, store := range []lapack.StoreV{lapack.ColumnWise, lapack.RowWise} {
+		for _, direct := range []lapack.Direct{lapack.Forward, lapack.Backward} {
+			for _, side := range []blas.Side{blas.Left, blas.Right} {
+				for _, trans := range []blas.Transpose{blas.Trans, blas.NoTrans} {
+					for cas, test := range []struct {
+						ma, na, cdim, lda, ldt, ldc int
+					}{
+						{6, 6, 6, 0, 0, 0},
+						{6, 8, 10, 0, 0, 0},
+						{6, 10, 8, 0, 0, 0},
+						{8, 6, 10, 0, 0, 0},
+						{8, 10, 6, 0, 0, 0},
+						{10, 6, 8, 0, 0, 0},
+						{10, 8, 6, 0, 0, 0},
+						{6, 6, 6, 12, 15, 30},
+						{6, 8, 10, 12, 15, 30},
+						{6, 10, 8, 12, 15, 30},
+						{8, 6, 10, 12, 15, 30},
+						{8, 10, 6, 12, 15, 30},
+						{10, 6, 8, 12, 15, 30},
+						{10, 8, 6, 12, 15, 30},
+						{6, 6, 6, 15, 12, 30},
+						{6, 8, 10, 15, 12, 30},
+						{6, 10, 8, 15, 12, 30},
+						{8, 6, 10, 15, 12, 30},
+						{8, 10, 6, 15, 12, 30},
+						{10, 6, 8, 15, 12, 30},
+						{10, 8, 6, 15, 12, 30},
+					} {
+						// Generate a matrix for QR
+						ma := test.ma
+						na := test.na
+						lda := test.lda
+						if lda == 0 {
+							lda = na
+						}
+						a := make([]float64, ma*lda)
+						for i := 0; i < ma; i++ {
+							for j := 0; j < lda; j++ {
+								a[i*lda+j] = rand.Float64()
+							}
+						}
+						k := min(ma, na)
+
+						// H is always ma x ma
+						var m, n, rowsWork int
+						switch {
+						default:
+							panic("not implemented")
+						case side == blas.Left:
+							m = test.ma
+							n = test.cdim
+							rowsWork = n
+						case side == blas.Right:
+							m = test.cdim
+							n = test.ma
+							rowsWork = m
+						}
+
+						// Use dgeqr2 to find the v vectors
+						tau := make([]float64, na)
+						work := make([]float64, na)
+						impl.Dgeqr2(ma, k, a, lda, tau, work)
+
+						// Correct the v vectors based on the direct and store
+						vMatTmp := extractVMat(ma, na, a, lda, lapack.Forward, lapack.ColumnWise)
+						vMat := constructVMat(vMatTmp, store, direct)
+						v := vMat.Data
+						ldv := vMat.Stride
+
+						// Use dlarft to find the t vector
+						ldt := test.ldt
+						if ldt == 0 {
+							ldt = k
+						}
+						tm := make([]float64, k*ldt)
+
+						impl.Dlarft(direct, store, ma, k, v, ldv, tau, tm, ldt)
+
+						// Generate c matrix
+						ldc := test.ldc
+						if ldc == 0 {
+							ldc = n
+						}
+						c := make([]float64, m*ldc)
+						for i := 0; i < m; i++ {
+							for j := 0; j < ldc; j++ {
+								c[i*ldc+j] = rand.Float64()
+							}
+						}
+						cCopy := make([]float64, len(c))
+						copy(cCopy, c)
+
+						ldwork := k
+						work = make([]float64, rowsWork*k)
+
+						// Call Dlarfb with this information
+						impl.Dlarfb(side, trans, direct, store, m, n, k, v, ldv, tm, ldt, c, ldc, work, ldwork)
+
+						h := constructH(tau, vMat, store, direct)
+
+						cMat := blas64.General{
+							Rows:   m,
+							Cols:   n,
+							Stride: ldc,
+							Data:   make([]float64, m*ldc),
+						}
+						copy(cMat.Data, cCopy)
+						ans := blas64.General{
+							Rows:   m,
+							Cols:   n,
+							Stride: ldc,
+							Data:   make([]float64, m*ldc),
+						}
+						copy(ans.Data, cMat.Data)
+						switch {
+						default:
+							panic("not implemented")
+						case side == blas.Left && trans == blas.NoTrans:
+							blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, h, cMat, 0, ans)
+						case side == blas.Left && trans == blas.Trans:
+							blas64.Gemm(blas.Trans, blas.NoTrans, 1, h, cMat, 0, ans)
+						case side == blas.Right && trans == blas.NoTrans:
+							blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, cMat, h, 0, ans)
+						case side == blas.Right && trans == blas.Trans:
+							blas64.Gemm(blas.NoTrans, blas.Trans, 1, cMat, h, 0, ans)
+						}
+						if !floats.EqualApprox(ans.Data, c, 1e-14) {
+							t.Errorf("Cas %v mismatch. Want %v, got %v.", cas, ans.Data, c)
+						}
+					}
+				}
+			}
+		}
+	}
+}
diff --git a/testlapack/dlarfg.go b/testlapack/dlarfg.go
new file mode 100644
index 00000000..2564998e
--- /dev/null
+++ b/testlapack/dlarfg.go
@@ -0,0 +1,133 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math"
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+)
+
+type Dlarfger interface {
+	Dlarfg(n int, alpha float64, x []float64, incX int) (beta, tau float64)
+}
+
+func DlarfgTest(t *testing.T, impl Dlarfger) {
+	for i, test := range []struct {
+		alpha float64
+		n     int
+		x     []float64
+	}{
+		{
+			alpha: 4,
+			n:     3,
+		},
+		{
+			alpha: -2,
+			n:     3,
+		},
+		{
+			alpha: 0,
+			n:     3,
+		},
+		{
+			alpha: 1,
+			n:     1,
+		},
+		{
+			alpha: 1,
+			n:     2,
+			x:     []float64{4, 5, 6},
+		},
+	} {
+		n := test.n
+		incX := 1
+		var x []float64
+		if test.x == nil {
+			x = make([]float64, n-1)
+			for i := range x {
+				x[i] = rand.Float64()
+			}
+		} else {
+			x = make([]float64, n-1)
+			copy(x, test.x)
+		}
+		xcopy := make([]float64, n-1)
+		copy(xcopy, x)
+		alpha := test.alpha
+		beta, tau := impl.Dlarfg(n, alpha, x, incX)
+
+		// Verify the returns and the values in v. Construct h and perform
+		// the explicit multiplication.
+		h := make([]float64, n*n)
+		for i := 0; i < n; i++ {
+			h[i*n+i] = 1
+		}
+		hmat := blas64.General{
+			Rows:   n,
+			Cols:   n,
+			Stride: n,
+			Data:   h,
+		}
+		v := make([]float64, n)
+		copy(v[1:], x)
+		v[0] = 1
+		vVec := blas64.Vector{
+			Inc:  1,
+			Data: v,
+		}
+		blas64.Ger(-tau, vVec, vVec, hmat)
+		eye := blas64.General{
+			Rows:   n,
+			Cols:   n,
+			Stride: n,
+			Data:   make([]float64, n*n),
+		}
+		blas64.Gemm(blas.Trans, blas.NoTrans, 1, hmat, hmat, 0, eye)
+		iseye := true
+		for i := 0; i < n; i++ {
+			for j := 0; j < n; j++ {
+				if i == j {
+					if math.Abs(eye.Data[i*n+j]-1) > 1e-14 {
+						iseye = false
+					}
+				} else {
+					if math.Abs(eye.Data[i*n+j]) > 1e-14 {
+						iseye = false
+					}
+				}
+			}
+		}
+		if !iseye {
+			t.Errorf("H^T * H is not I %V", eye)
+		}
+
+		xVec := blas64.Vector{
+			Inc:  1,
+			Data: make([]float64, n),
+		}
+		xVec.Data[0] = test.alpha
+		copy(xVec.Data[1:], xcopy)
+
+		ans := make([]float64, n)
+		ansVec := blas64.Vector{
+			Inc:  1,
+			Data: ans,
+		}
+		blas64.Gemv(blas.NoTrans, 1, hmat, xVec, 0, ansVec)
+		if math.Abs(ans[0]-beta) > 1e-14 {
+			t.Errorf("Case %v, beta mismatch. Want %v, got %v", i, ans[0], beta)
+		}
+		for i := 1; i < n; i++ {
+			if math.Abs(ans[i]) > 1e-14 {
+				t.Errorf("Case %v, nonzero answer %v", i, ans)
+				break
+			}
+		}
+	}
+}
diff --git a/testlapack/dlarft.go b/testlapack/dlarft.go
new file mode 100644
index 00000000..e26543c7
--- /dev/null
+++ b/testlapack/dlarft.go
@@ -0,0 +1,167 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/floats"
+	"github.com/gonum/lapack"
+)
+
+type Dlarfter interface {
+	Dgeqr2er
+	Dlarft(direct lapack.Direct, store lapack.StoreV, n, k int, v []float64, ldv int, tau []float64, t []float64, ldt int)
+}
+
+func DlarftTest(t *testing.T, impl Dlarfter) {
+	for _, store := range []lapack.StoreV{lapack.ColumnWise, lapack.RowWise} {
+		for _, direct := range []lapack.Direct{lapack.Forward, lapack.Backward} {
+			for _, test := range []struct {
+				m, n, ldv, ldt int
+			}{
+				{6, 6, 0, 0},
+				{8, 6, 0, 0},
+				{6, 8, 0, 0},
+				{6, 6, 10, 15},
+				{8, 6, 10, 15},
+				{6, 8, 10, 15},
+				{6, 6, 15, 10},
+				{8, 6, 15, 10},
+				{6, 8, 15, 10},
+			} {
+				// Generate a matrix
+				m := test.m
+				n := test.n
+				lda := n
+				if lda == 0 {
+					lda = n
+				}
+
+				a := make([]float64, m*lda)
+				for i := 0; i < m; i++ {
+					for j := 0; j < lda; j++ {
+						a[i*lda+j] = rand.Float64()
+					}
+				}
+				// Use dgeqr2 to find the v vectors
+				tau := make([]float64, n)
+				work := make([]float64, n)
+				impl.Dgeqr2(m, n, a, lda, tau, work)
+
+				// Construct H using these answers
+				vMatTmp := extractVMat(m, n, a, lda, lapack.Forward, lapack.ColumnWise)
+				vMat := constructVMat(vMatTmp, store, direct)
+				v := vMat.Data
+				ldv := vMat.Stride
+
+				h := constructH(tau, vMat, store, direct)
+
+				k := min(m, n)
+				ldt := test.ldt
+				if ldt == 0 {
+					ldt = k
+				}
+				// Find T from the actual function
+				tm := make([]float64, k*ldt)
+				for i := range tm {
+					tm[i] = 100 + rand.Float64()
+				}
+				// The v data has been put into a.
+				impl.Dlarft(direct, store, m, k, v, ldv, tau, tm, ldt)
+
+				tData := make([]float64, len(tm))
+				copy(tData, tm)
+				if direct == lapack.Forward {
+					// Zero out the lower traingular portion.
+					for i := 0; i < k; i++ {
+						for j := 0; j < i; j++ {
+							tData[i*ldt+j] = 0
+						}
+					}
+				} else {
+					// Zero out the upper traingular portion.
+					for i := 0; i < k; i++ {
+						for j := i + 1; j < k; j++ {
+							tData[i*ldt+j] = 0
+						}
+					}
+				}
+
+				T := blas64.General{
+					Rows:   k,
+					Cols:   k,
+					Stride: ldt,
+					Data:   tData,
+				}
+
+				vMatT := blas64.General{
+					Rows:   vMat.Cols,
+					Cols:   vMat.Rows,
+					Stride: vMat.Rows,
+					Data:   make([]float64, vMat.Cols*vMat.Rows),
+				}
+				for i := 0; i < vMat.Rows; i++ {
+					for j := 0; j < vMat.Cols; j++ {
+						vMatT.Data[j*vMatT.Stride+i] = vMat.Data[i*vMat.Stride+j]
+					}
+				}
+				var comp blas64.General
+				if store == lapack.ColumnWise {
+					// H = I - V * T * V^T
+					tmp := blas64.General{
+						Rows:   T.Rows,
+						Cols:   vMatT.Cols,
+						Stride: vMatT.Cols,
+						Data:   make([]float64, T.Rows*vMatT.Cols),
+					}
+					// T * V^T
+					blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, T, vMatT, 0, tmp)
+					comp = blas64.General{
+						Rows:   vMat.Rows,
+						Cols:   tmp.Cols,
+						Stride: tmp.Cols,
+						Data:   make([]float64, vMat.Rows*tmp.Cols),
+					}
+					// V * (T * V^T)
+					blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, vMat, tmp, 0, comp)
+				} else {
+					// H = I - V^T * T * V
+					tmp := blas64.General{
+						Rows:   T.Rows,
+						Cols:   vMat.Cols,
+						Stride: vMat.Cols,
+						Data:   make([]float64, T.Rows*vMat.Cols),
+					}
+					// T * V
+					blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, T, vMat, 0, tmp)
+					comp = blas64.General{
+						Rows:   vMatT.Rows,
+						Cols:   tmp.Cols,
+						Stride: tmp.Cols,
+						Data:   make([]float64, vMatT.Rows*tmp.Cols),
+					}
+					// V^T * (T * V)
+					blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, vMatT, tmp, 0, comp)
+				}
+				// I - V^T * T * V
+				for i := 0; i < comp.Rows; i++ {
+					for j := 0; j < comp.Cols; j++ {
+						comp.Data[i*m+j] *= -1
+						if i == j {
+							comp.Data[i*m+j] += 1
+						}
+					}
+				}
+				if !floats.EqualApprox(comp.Data, h.Data, 1e-14) {
+					t.Errorf("T does not construct proper H. Store = %v, Direct = %v.\nWant %v\ngot %v.", string(store), string(direct), h.Data, comp.Data)
+				}
+			}
+		}
+	}
+}
diff --git a/testlapack/dorm2r.go b/testlapack/dorm2r.go
new file mode 100644
index 00000000..c409c78b
--- /dev/null
+++ b/testlapack/dorm2r.go
@@ -0,0 +1,138 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/floats"
+)
+
+type Dorm2rer interface {
+	Dgeqrfer
+	Dorm2r(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64)
+}
+
+func Dorm2rTest(t *testing.T, impl Dorm2rer) {
+	for _, side := range []blas.Side{blas.Left, blas.Right} {
+		for _, trans := range []blas.Transpose{blas.NoTrans, blas.Trans} {
+			for _, test := range []struct {
+				common, adim, cdim, lda, ldc int
+			}{
+				{3, 4, 5, 0, 0},
+				{3, 5, 4, 0, 0},
+				{4, 3, 5, 0, 0},
+				{4, 5, 3, 0, 0},
+				{5, 3, 4, 0, 0},
+				{5, 4, 3, 0, 0},
+				{3, 4, 5, 6, 20},
+				{3, 5, 4, 6, 20},
+				{4, 3, 5, 6, 20},
+				{4, 5, 3, 6, 20},
+				{5, 3, 4, 6, 20},
+				{5, 4, 3, 6, 20},
+				{3, 4, 5, 20, 6},
+				{3, 5, 4, 20, 6},
+				{4, 3, 5, 20, 6},
+				{4, 5, 3, 20, 6},
+				{5, 3, 4, 20, 6},
+				{5, 4, 3, 20, 6},
+			} {
+				var ma, na, mc, nc int
+				if side == blas.Left {
+					ma = test.common
+					na = test.adim
+					mc = test.common
+					nc = test.cdim
+				} else {
+					ma = test.common
+					na = test.adim
+					mc = test.cdim
+					nc = test.common
+				}
+
+				// Generate a random matrix
+				lda := test.lda
+				if lda == 0 {
+					lda = na
+				}
+				a := make([]float64, ma*lda)
+				for i := range a {
+					a[i] = rand.Float64()
+				}
+				ldc := test.ldc
+				if ldc == 0 {
+					ldc = nc
+				}
+				// Compute random C matrix
+				c := make([]float64, mc*ldc)
+				for i := range c {
+					c[i] = rand.Float64()
+				}
+
+				// Compute QR
+				k := min(ma, na)
+				tau := make([]float64, k)
+				work := make([]float64, 1)
+				impl.Dgeqrf(ma, na, a, lda, tau, work, -1)
+				work = make([]float64, int(work[0]))
+				impl.Dgeqrf(ma, na, a, lda, tau, work, len(work))
+
+				// Build Q from result
+				q := constructQ("QR", ma, na, a, lda, tau)
+
+				cMat := blas64.General{
+					Rows:   mc,
+					Cols:   nc,
+					Stride: ldc,
+					Data:   make([]float64, len(c)),
+				}
+				copy(cMat.Data, c)
+				cMatCopy := blas64.General{
+					Rows:   cMat.Rows,
+					Cols:   cMat.Cols,
+					Stride: cMat.Stride,
+					Data:   make([]float64, len(cMat.Data)),
+				}
+				copy(cMatCopy.Data, cMat.Data)
+				switch {
+				default:
+					panic("bad test")
+				case side == blas.Left && trans == blas.NoTrans:
+					blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, q, cMatCopy, 0, cMat)
+				case side == blas.Left && trans == blas.Trans:
+					blas64.Gemm(blas.Trans, blas.NoTrans, 1, q, cMatCopy, 0, cMat)
+				case side == blas.Right && trans == blas.NoTrans:
+					blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, cMatCopy, q, 0, cMat)
+				case side == blas.Right && trans == blas.Trans:
+					blas64.Gemm(blas.NoTrans, blas.Trans, 1, cMatCopy, q, 0, cMat)
+				}
+				// Do Dorm2r ard compare
+				if side == blas.Left {
+					work = make([]float64, nc)
+				} else {
+					work = make([]float64, mc)
+				}
+				aCopy := make([]float64, len(a))
+				copy(aCopy, a)
+				tauCopy := make([]float64, len(tau))
+				copy(tauCopy, tau)
+				impl.Dorm2r(side, trans, mc, nc, k, a, lda, tau, c, ldc, work)
+				if !floats.Equal(a, aCopy) {
+					t.Errorf("a changed in call")
+				}
+				if !floats.Equal(tau, tauCopy) {
+					t.Errorf("tau changed in call")
+				}
+				if !floats.EqualApprox(cMat.Data, c, 1e-14) {
+					t.Errorf("Multiplication mismatch.\n Want %v \n got %v.", cMat.Data, c)
+				}
+			}
+		}
+	}
+}
diff --git a/testlapack/dorml2.go b/testlapack/dorml2.go
new file mode 100644
index 00000000..5e87becf
--- /dev/null
+++ b/testlapack/dorml2.go
@@ -0,0 +1,137 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/floats"
+)
+
+type Dorml2er interface {
+	Dgelqfer
+	Dorml2(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64)
+}
+
+func Dorml2Test(t *testing.T, impl Dorml2er) {
+	for _, side := range []blas.Side{blas.Left, blas.Right} {
+		for _, trans := range []blas.Transpose{blas.NoTrans, blas.Trans} {
+			for _, test := range []struct {
+				common, adim, cdim, lda, ldc int
+			}{
+				{3, 4, 5, 0, 0},
+				{3, 5, 4, 0, 0},
+				{4, 3, 5, 0, 0},
+				{4, 5, 3, 0, 0},
+				{5, 3, 4, 0, 0},
+				{5, 4, 3, 0, 0},
+				{3, 4, 5, 6, 20},
+				{3, 5, 4, 6, 20},
+				{4, 3, 5, 6, 20},
+				{4, 5, 3, 6, 20},
+				{5, 3, 4, 6, 20},
+				{5, 4, 3, 6, 20},
+				{3, 4, 5, 20, 6},
+				{3, 5, 4, 20, 6},
+				{4, 3, 5, 20, 6},
+				{4, 5, 3, 20, 6},
+				{5, 3, 4, 20, 6},
+				{5, 4, 3, 20, 6},
+			} {
+				var ma, na, mc, nc int
+				if side == blas.Left {
+					ma = test.adim
+					na = test.common
+					mc = test.common
+					nc = test.cdim
+				} else {
+					ma = test.adim
+					na = test.common
+					mc = test.cdim
+					nc = test.common
+				}
+				// Generate a random matrix
+				lda := test.lda
+				if lda == 0 {
+					lda = na
+				}
+				a := make([]float64, ma*lda)
+				for i := range a {
+					a[i] = rand.Float64()
+				}
+				ldc := test.ldc
+				if ldc == 0 {
+					ldc = nc
+				}
+				// Compute random C matrix
+				c := make([]float64, mc*ldc)
+				for i := range c {
+					c[i] = rand.Float64()
+				}
+
+				// Compute LQ
+				k := min(ma, na)
+				tau := make([]float64, k)
+				work := make([]float64, 1)
+				impl.Dgelqf(ma, na, a, lda, tau, work, -1)
+				work = make([]float64, int(work[0]))
+				impl.Dgelqf(ma, na, a, lda, tau, work, len(work))
+
+				// Build Q from result
+				q := constructQ("LQ", ma, na, a, lda, tau)
+
+				cMat := blas64.General{
+					Rows:   mc,
+					Cols:   nc,
+					Stride: ldc,
+					Data:   make([]float64, len(c)),
+				}
+				copy(cMat.Data, c)
+				cMatCopy := blas64.General{
+					Rows:   cMat.Rows,
+					Cols:   cMat.Cols,
+					Stride: cMat.Stride,
+					Data:   make([]float64, len(cMat.Data)),
+				}
+				copy(cMatCopy.Data, cMat.Data)
+				switch {
+				default:
+					panic("bad test")
+				case side == blas.Left && trans == blas.NoTrans:
+					blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, q, cMatCopy, 0, cMat)
+				case side == blas.Left && trans == blas.Trans:
+					blas64.Gemm(blas.Trans, blas.NoTrans, 1, q, cMatCopy, 0, cMat)
+				case side == blas.Right && trans == blas.NoTrans:
+					blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, cMatCopy, q, 0, cMat)
+				case side == blas.Right && trans == blas.Trans:
+					blas64.Gemm(blas.NoTrans, blas.Trans, 1, cMatCopy, q, 0, cMat)
+				}
+				// Do Dorm2r ard compare
+				if side == blas.Left {
+					work = make([]float64, nc)
+				} else {
+					work = make([]float64, mc)
+				}
+				aCopy := make([]float64, len(a))
+				copy(aCopy, a)
+				tauCopy := make([]float64, len(tau))
+				copy(tauCopy, tau)
+				impl.Dorml2(side, trans, mc, nc, k, a, lda, tau, c, ldc, work)
+				if !floats.Equal(a, aCopy) {
+					t.Errorf("a changed in call")
+				}
+				if !floats.Equal(tau, tauCopy) {
+					t.Errorf("tau changed in call")
+				}
+				if !floats.EqualApprox(cMat.Data, c, 1e-14) {
+					t.Errorf("Multiplication mismatch.\n Want %v \n got %v.", cMat.Data, c)
+				}
+			}
+		}
+	}
+}
diff --git a/testlapack/dormlq.go b/testlapack/dormlq.go
new file mode 100644
index 00000000..437c9651
--- /dev/null
+++ b/testlapack/dormlq.go
@@ -0,0 +1,149 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"fmt"
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/floats"
+)
+
+type Dormlqer interface {
+	Dorml2er
+	Dormlq(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int)
+}
+
+func DormlqTest(t *testing.T, impl Dormlqer) {
+	for _, side := range []blas.Side{blas.Left, blas.Right} {
+		for _, trans := range []blas.Transpose{blas.NoTrans, blas.Trans} {
+			for _, test := range []struct {
+				common, adim, cdim, lda, ldc int
+			}{
+				{6, 7, 8, 0, 0},
+				{6, 8, 7, 0, 0},
+				{7, 6, 8, 0, 0},
+				{7, 8, 6, 0, 0},
+				{8, 6, 7, 0, 0},
+				{8, 7, 6, 0, 0},
+				{100, 200, 300, 0, 0},
+				{100, 300, 200, 0, 0},
+				{200, 100, 300, 0, 0},
+				{200, 300, 100, 0, 0},
+				{300, 100, 200, 0, 0},
+				{300, 200, 100, 0, 0},
+				{100, 200, 300, 400, 500},
+				{100, 300, 200, 400, 500},
+				{200, 100, 300, 400, 500},
+				{200, 300, 100, 400, 500},
+				{300, 100, 200, 400, 500},
+				{300, 200, 100, 400, 500},
+				{100, 200, 300, 500, 400},
+				{100, 300, 200, 500, 400},
+				{200, 100, 300, 500, 400},
+				{200, 300, 100, 500, 400},
+				{300, 100, 200, 500, 400},
+				{300, 200, 100, 500, 400},
+			} {
+				var ma, na, mc, nc int
+				if side == blas.Left {
+					ma = test.adim
+					na = test.common
+					mc = test.common
+					nc = test.cdim
+				} else {
+					ma = test.adim
+					na = test.common
+					mc = test.cdim
+					nc = test.common
+				}
+				// Generate a random matrix
+				lda := test.lda
+				if lda == 0 {
+					lda = na
+				}
+				a := make([]float64, ma*lda)
+				for i := range a {
+					a[i] = rand.Float64()
+				}
+				// Compute random C matrix
+				ldc := test.ldc
+				if ldc == 0 {
+					ldc = nc
+				}
+				c := make([]float64, mc*ldc)
+				for i := range c {
+					c[i] = rand.Float64()
+				}
+
+				// Compute LQ
+				k := min(ma, na)
+				tau := make([]float64, k)
+				work := make([]float64, 1)
+				impl.Dgelqf(ma, na, a, lda, tau, work, -1)
+				work = make([]float64, int(work[0]))
+				impl.Dgelqf(ma, na, a, lda, tau, work, len(work))
+
+				cCopy := make([]float64, len(c))
+				copy(cCopy, c)
+				ans := make([]float64, len(c))
+				copy(ans, cCopy)
+
+				if side == blas.Left {
+					work = make([]float64, nc)
+				} else {
+					work = make([]float64, mc)
+				}
+				impl.Dorml2(side, trans, mc, nc, k, a, lda, tau, ans, ldc, work)
+
+				// Make sure Dorml2 and Dormlq match with small work
+				for i := range work {
+					work[i] = rand.Float64()
+				}
+				lwork := len(work)
+				copy(c, cCopy)
+				impl.Dormlq(side, trans, mc, nc, k, a, lda, tau, c, ldc, work, lwork)
+				if !floats.EqualApprox(c, ans, 1e-12) {
+					t.Errorf("Dormqr and Dorm2r mismatch for small work")
+				}
+
+				// Try with the optimum amount of work
+				copy(c, cCopy)
+				impl.Dormlq(side, trans, mc, nc, k, a, lda, tau, c, ldc, work, -1)
+				work = make([]float64, int(work[0]))
+				lwork = len(work)
+				for i := range work {
+					work[i] = rand.Float64()
+				}
+				impl.Dormlq(side, trans, mc, nc, k, a, lda, tau, c, ldc, work, lwork)
+				if !floats.EqualApprox(c, ans, 1e-12) {
+					t.Errorf("Dormqr and Dorm2r mismatch for full work")
+					fmt.Println("ccopy")
+					for i := 0; i < mc; i++ {
+						fmt.Println(cCopy[i*ldc : (i+1)*ldc])
+					}
+					fmt.Println("ans =")
+					for i := 0; i < mc; i++ {
+						fmt.Println(ans[i*ldc : (i+1)*ldc])
+					}
+					fmt.Println("c =")
+					for i := 0; i < mc; i++ {
+						fmt.Println(c[i*ldc : (i+1)*ldc])
+					}
+				}
+				// Try with less than the optimum amount of work
+				copy(c, cCopy)
+				work = work[1:]
+				lwork--
+				impl.Dormlq(side, trans, mc, nc, k, a, lda, tau, c, ldc, work, lwork)
+				if !floats.EqualApprox(c, ans, 1e-12) {
+					t.Errorf("Dormqr and Dorm2r mismatch for medium work")
+				}
+			}
+		}
+	}
+}
diff --git a/testlapack/dormqr.go b/testlapack/dormqr.go
new file mode 100644
index 00000000..f12991f5
--- /dev/null
+++ b/testlapack/dormqr.go
@@ -0,0 +1,150 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"fmt"
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/floats"
+)
+
+type Dormqrer interface {
+	Dorm2rer
+	Dormqr(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int)
+}
+
+func DormqrTest(t *testing.T, impl Dormqrer) {
+	for _, side := range []blas.Side{blas.Left, blas.Right} {
+		for _, trans := range []blas.Transpose{blas.NoTrans, blas.Trans} {
+			for _, test := range []struct {
+				common, adim, cdim, lda, ldc int
+			}{
+				{6, 7, 8, 0, 0},
+				{6, 8, 7, 0, 0},
+				{7, 6, 8, 0, 0},
+				{7, 8, 6, 0, 0},
+				{8, 6, 7, 0, 0},
+				{8, 7, 6, 0, 0},
+				{100, 200, 300, 0, 0},
+				{100, 300, 200, 0, 0},
+				{200, 100, 300, 0, 0},
+				{200, 300, 100, 0, 0},
+				{300, 100, 200, 0, 0},
+				{300, 200, 100, 0, 0},
+				{100, 200, 300, 400, 500},
+				{100, 300, 200, 400, 500},
+				{200, 100, 300, 400, 500},
+				{200, 300, 100, 400, 500},
+				{300, 100, 200, 400, 500},
+				{300, 200, 100, 400, 500},
+				{100, 200, 300, 500, 400},
+				{100, 300, 200, 500, 400},
+				{200, 100, 300, 500, 400},
+				{200, 300, 100, 500, 400},
+				{300, 100, 200, 500, 400},
+				{300, 200, 100, 500, 400},
+			} {
+				var ma, na, mc, nc int
+				if side == blas.Left {
+					ma = test.common
+					na = test.adim
+					mc = test.common
+					nc = test.cdim
+				} else {
+					ma = test.common
+					na = test.adim
+					mc = test.cdim
+					nc = test.common
+				}
+				// Generate a random matrix
+				lda := test.lda
+				if lda == 0 {
+					lda = na
+				}
+				a := make([]float64, ma*lda)
+				for i := range a {
+					a[i] = rand.Float64()
+				}
+				// Compute random C matrix
+				ldc := test.ldc
+				if ldc == 0 {
+					ldc = nc
+				}
+				c := make([]float64, mc*ldc)
+				for i := range c {
+					c[i] = rand.Float64()
+				}
+
+				// Compute QR
+				k := min(ma, na)
+				tau := make([]float64, k)
+				work := make([]float64, 1)
+				impl.Dgeqrf(ma, na, a, lda, tau, work, -1)
+				work = make([]float64, int(work[0]))
+				impl.Dgeqrf(ma, na, a, lda, tau, work, len(work))
+
+				cCopy := make([]float64, len(c))
+				copy(cCopy, c)
+				ans := make([]float64, len(c))
+				copy(ans, cCopy)
+
+				if side == blas.Left {
+					work = make([]float64, nc)
+				} else {
+					work = make([]float64, mc)
+				}
+				impl.Dorm2r(side, trans, mc, nc, k, a, lda, tau, ans, ldc, work)
+
+				// Make sure Dorm2r and Dormqr match with small work
+				for i := range work {
+					work[i] = rand.Float64()
+				}
+				lwork := len(work)
+				copy(c, cCopy)
+				impl.Dormqr(side, trans, mc, nc, k, a, lda, tau, c, ldc, work, lwork)
+				if !floats.EqualApprox(c, ans, 1e-12) {
+					t.Errorf("Dormqr and Dorm2r mismatch for small work")
+				}
+
+				// Try with the optimum amount of work
+				copy(c, cCopy)
+				impl.Dormqr(side, trans, mc, nc, k, a, lda, tau, c, ldc, work, -1)
+				work = make([]float64, int(work[0]))
+				lwork = len(work)
+				for i := range work {
+					work[i] = rand.Float64()
+				}
+				_ = lwork
+				impl.Dormqr(side, trans, mc, nc, k, a, lda, tau, c, ldc, work, lwork)
+				if !floats.EqualApprox(c, ans, 1e-12) {
+					t.Errorf("Dormqr and Dorm2r mismatch for full work")
+					fmt.Println("ccopy")
+					for i := 0; i < mc; i++ {
+						fmt.Println(cCopy[i*ldc : (i+1)*ldc])
+					}
+					fmt.Println("ans =")
+					for i := 0; i < mc; i++ {
+						fmt.Println(ans[i*ldc : (i+1)*ldc])
+					}
+					fmt.Println("c =")
+					for i := 0; i < mc; i++ {
+						fmt.Println(c[i*ldc : (i+1)*ldc])
+					}
+				}
+				// Try with less than the optimum amount of work
+				copy(c, cCopy)
+				work = work[1:]
+				lwork--
+				impl.Dormqr(side, trans, mc, nc, k, a, lda, tau, c, ldc, work, lwork)
+				if !floats.EqualApprox(c, ans, 1e-12) {
+					t.Errorf("Dormqr and Dorm2r mismatch for medium work")
+				}
+			}
+		}
+	}
+}
diff --git a/testlapack/general.go b/testlapack/general.go
index d5e13073..53b3947f 100644
--- a/testlapack/general.go
+++ b/testlapack/general.go
@@ -4,9 +4,289 @@
 
 package testlapack
 
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/lapack"
+)
+
 func max(a, b int) int {
 	if a > b {
 		return a
 	}
 	return b
 }
+
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+// extractVMat collects the single reflectors from a into a matrix.
+func extractVMat(m, n int, a []float64, lda int, direct lapack.Direct, store lapack.StoreV) blas64.General {
+	k := min(m, n)
+	switch {
+	default:
+		panic("not implemented")
+	case direct == lapack.Forward && store == lapack.ColumnWise:
+		v := blas64.General{
+			Rows:   m,
+			Cols:   k,
+			Stride: k,
+			Data:   make([]float64, m*k),
+		}
+		for i := 0; i < k; i++ {
+			for j := 0; j < i; j++ {
+				v.Data[j*v.Stride+i] = 0
+			}
+			v.Data[i*v.Stride+i] = 1
+			for j := i + 1; j < m; j++ {
+				v.Data[j*v.Stride+i] = a[j*lda+i]
+			}
+		}
+		return v
+	case direct == lapack.Forward && store == lapack.RowWise:
+		v := blas64.General{
+			Rows:   k,
+			Cols:   n,
+			Stride: n,
+			Data:   make([]float64, k*n),
+		}
+		for i := 0; i < k; i++ {
+			for j := 0; j < i; j++ {
+				v.Data[i*v.Stride+j] = 0
+			}
+			v.Data[i*v.Stride+i] = 1
+			for j := i + 1; j < n; j++ {
+				v.Data[i*v.Stride+j] = a[i*lda+j]
+			}
+		}
+		return v
+	}
+}
+
+// constructVMat transforms the v matrix based on the storage.
+func constructVMat(vMat blas64.General, store lapack.StoreV, direct lapack.Direct) blas64.General {
+	m := vMat.Rows
+	k := vMat.Cols
+	switch {
+	default:
+		panic("not implemented")
+	case store == lapack.ColumnWise && direct == lapack.Forward:
+		ldv := k
+		v := make([]float64, m*k)
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				if j > i {
+					v[i*ldv+j] = 0
+				} else if j == i {
+					v[i*ldv+i] = 1
+				} else {
+					v[i*ldv+j] = vMat.Data[i*vMat.Stride+j]
+				}
+			}
+		}
+		return blas64.General{
+			Rows:   m,
+			Cols:   k,
+			Stride: k,
+			Data:   v,
+		}
+	case store == lapack.RowWise && direct == lapack.Forward:
+		ldv := m
+		v := make([]float64, m*k)
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				if j > i {
+					v[j*ldv+i] = 0
+				} else if j == i {
+					v[j*ldv+i] = 1
+				} else {
+					v[j*ldv+i] = vMat.Data[i*vMat.Stride+j]
+				}
+			}
+		}
+		return blas64.General{
+			Rows:   k,
+			Cols:   m,
+			Stride: m,
+			Data:   v,
+		}
+	case store == lapack.ColumnWise && direct == lapack.Backward:
+		rowsv := m
+		ldv := k
+		v := make([]float64, m*k)
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				vrow := rowsv - i - 1
+				vcol := k - j - 1
+				if j > i {
+					v[vrow*ldv+vcol] = 0
+				} else if j == i {
+					v[vrow*ldv+vcol] = 1
+				} else {
+					v[vrow*ldv+vcol] = vMat.Data[i*vMat.Stride+j]
+				}
+			}
+		}
+		return blas64.General{
+			Rows:   rowsv,
+			Cols:   ldv,
+			Stride: ldv,
+			Data:   v,
+		}
+	case store == lapack.RowWise && direct == lapack.Backward:
+		rowsv := k
+		ldv := m
+		v := make([]float64, m*k)
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				vcol := ldv - i - 1
+				vrow := k - j - 1
+				if j > i {
+					v[vrow*ldv+vcol] = 0
+				} else if j == i {
+					v[vrow*ldv+vcol] = 1
+				} else {
+					v[vrow*ldv+vcol] = vMat.Data[i*vMat.Stride+j]
+				}
+			}
+		}
+		return blas64.General{
+			Rows:   rowsv,
+			Cols:   ldv,
+			Stride: ldv,
+			Data:   v,
+		}
+	}
+}
+
+func constructH(tau []float64, v blas64.General, store lapack.StoreV, direct lapack.Direct) blas64.General {
+	m := v.Rows
+	k := v.Cols
+	if store == lapack.RowWise {
+		m, k = k, m
+	}
+	h := blas64.General{
+		Rows:   m,
+		Cols:   m,
+		Stride: m,
+		Data:   make([]float64, m*m),
+	}
+	for i := 0; i < m; i++ {
+		h.Data[i*m+i] = 1
+	}
+	for i := 0; i < k; i++ {
+		vecData := make([]float64, m)
+		if store == lapack.ColumnWise {
+			for j := 0; j < m; j++ {
+				vecData[j] = v.Data[j*v.Cols+i]
+			}
+		} else {
+			for j := 0; j < m; j++ {
+				vecData[j] = v.Data[i*v.Cols+j]
+			}
+		}
+		vec := blas64.Vector{
+			Inc:  1,
+			Data: vecData,
+		}
+
+		hi := blas64.General{
+			Rows:   m,
+			Cols:   m,
+			Stride: m,
+			Data:   make([]float64, m*m),
+		}
+		for i := 0; i < m; i++ {
+			hi.Data[i*m+i] = 1
+		}
+		// hi = I - tau * v * v^T
+		blas64.Ger(-tau[i], vec, vec, hi)
+
+		hcopy := blas64.General{
+			Rows:   m,
+			Cols:   m,
+			Stride: m,
+			Data:   make([]float64, m*m),
+		}
+		copy(hcopy.Data, h.Data)
+		if direct == lapack.Forward {
+			// H = H * H_I in forward mode
+			blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, hcopy, hi, 0, h)
+		} else {
+			// H = H_I * H in backward mode
+			blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, hi, hcopy, 0, h)
+		}
+	}
+	return h
+}
+
+// constructQ constructs the Q matrix from the result of dgeqrf and dgeqr2
+func constructQ(kind string, m, n int, a []float64, lda int, tau []float64) blas64.General {
+	k := min(m, n)
+	var sz int
+	switch kind {
+	case "QR":
+		sz = m
+	case "LQ":
+		sz = n
+	}
+
+	q := blas64.General{
+		Rows:   sz,
+		Cols:   sz,
+		Stride: sz,
+		Data:   make([]float64, sz*sz),
+	}
+	for i := 0; i < sz; i++ {
+		q.Data[i*sz+i] = 1
+	}
+	qCopy := blas64.General{
+		Rows:   q.Rows,
+		Cols:   q.Cols,
+		Stride: q.Stride,
+		Data:   make([]float64, len(q.Data)),
+	}
+	for i := 0; i < k; i++ {
+		h := blas64.General{
+			Rows:   sz,
+			Cols:   sz,
+			Stride: sz,
+			Data:   make([]float64, sz*sz),
+		}
+		for j := 0; j < sz; j++ {
+			h.Data[j*sz+j] = 1
+		}
+		vVec := blas64.Vector{
+			Inc:  1,
+			Data: make([]float64, sz),
+		}
+		for j := 0; j < i; j++ {
+			vVec.Data[j] = 0
+		}
+		vVec.Data[i] = 1
+		switch kind {
+		case "QR":
+			for j := i + 1; j < sz; j++ {
+				vVec.Data[j] = a[lda*j+i]
+			}
+		case "LQ":
+			for j := i + 1; j < sz; j++ {
+				vVec.Data[j] = a[i*lda+j]
+			}
+		}
+		blas64.Ger(-tau[i], vVec, vVec, h)
+		copy(qCopy.Data, q.Data)
+		// Mulitply q by the new h
+		switch kind {
+		case "QR":
+			blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qCopy, h, 0, q)
+		case "LQ":
+			blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, h, qCopy, 0, q)
+		}
+	}
+	return q
+}