mirror of
				https://github.com/gonum/gonum.git
				synced 2025-10-31 10:36:30 +08:00 
			
		
		
		
	asm: add generation of DaxpyUnitary and DaxpyUnitaryTo
This commit is contained in:
		| @@ -6,6 +6,12 @@ | |||||||
|  |  | ||||||
| package asm | package asm | ||||||
|  |  | ||||||
|  | func CaxpyUnitary(alpha complex64, x, y []complex64) { | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		y[i] += alpha * v | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| func CaxpyUnitaryTo(dst []complex64, alpha complex64, x, y []complex64) { | func CaxpyUnitaryTo(dst []complex64, alpha complex64, x, y []complex64) { | ||||||
| 	for i, v := range x { | 	for i, v := range x { | ||||||
| 		dst[i] = alpha*v + y[i] | 		dst[i] = alpha*v + y[i] | ||||||
|   | |||||||
| @@ -6,6 +6,12 @@ | |||||||
|  |  | ||||||
| package asm | package asm | ||||||
|  |  | ||||||
|  | func DaxpyUnitary(alpha float64, x, y []float64) { | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		y[i] += alpha * v | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| func DaxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) { | func DaxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) { | ||||||
| 	for i, v := range x { | 	for i, v := range x { | ||||||
| 		dst[i] = alpha*v + y[i] | 		dst[i] = alpha*v + y[i] | ||||||
|   | |||||||
| @@ -6,6 +6,8 @@ | |||||||
|  |  | ||||||
| package asm | package asm | ||||||
|  |  | ||||||
|  | func DaxpyUnitary(alpha float64, x, y []float64) | ||||||
|  |  | ||||||
| func DaxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) | func DaxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) | ||||||
|  |  | ||||||
| func DaxpyInc(alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) | func DaxpyInc(alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) | ||||||
|   | |||||||
| @@ -41,47 +41,6 @@ | |||||||
| // Don't insert stack check preamble. | // Don't insert stack check preamble. | ||||||
| #define NOSPLIT	4 | #define NOSPLIT	4 | ||||||
|  |  | ||||||
| // func DaxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) |  | ||||||
| // This function assumes len(y) >= len(x) and len(dst) >= len(x). |  | ||||||
| // TODO(vladimir-ch): Generate DaxpyUnitary and DaxpyUnitaryTo. |  | ||||||
| TEXT ·DaxpyUnitaryTo(SB), NOSPLIT, $0 |  | ||||||
| 	MOVQ   dst+0(FP), R10 |  | ||||||
| 	MOVHPD alpha+24(FP), X7 |  | ||||||
| 	MOVLPD alpha+24(FP), X7 |  | ||||||
| 	MOVQ   x+32(FP), R8 |  | ||||||
| 	MOVQ   x_len+40(FP), DI // n = len(x) |  | ||||||
| 	MOVQ   y+56(FP), R9 |  | ||||||
|  |  | ||||||
| 	MOVQ $0, SI // i = 0 |  | ||||||
| 	SUBQ $2, DI // n -= 2 |  | ||||||
| 	JL   V1     // if n < 0 goto V1 |  | ||||||
|  |  | ||||||
| U1:  // n >= 0 |  | ||||||
| 	// dst[i] = alpha * x[i] + y[i] unrolled 2x. |  | ||||||
| 	MOVUPD 0(R8)(SI*8), X0 |  | ||||||
| 	MOVUPD 0(R9)(SI*8), X1 |  | ||||||
| 	MULPD  X7, X0 |  | ||||||
| 	ADDPD  X0, X1 |  | ||||||
| 	MOVUPD X1, 0(R10)(SI*8) |  | ||||||
|  |  | ||||||
| 	ADDQ $2, SI // i += 2 |  | ||||||
| 	SUBQ $2, DI // n -= 2 |  | ||||||
| 	JGE  U1     // if n >= 0 goto U1 |  | ||||||
|  |  | ||||||
| V1: |  | ||||||
| 	ADDQ $2, DI // n += 2 |  | ||||||
| 	JLE  E1     // if n <= 0 goto E1 |  | ||||||
|  |  | ||||||
| 	// dst[i] = alpha * x[i] + y[i] for last iteration if n is odd. |  | ||||||
| 	MOVSD 0(R8)(SI*8), X0 |  | ||||||
| 	MOVSD 0(R9)(SI*8), X1 |  | ||||||
| 	MULSD X7, X0 |  | ||||||
| 	ADDSD X0, X1 |  | ||||||
| 	MOVSD X1, 0(R10)(SI*8) |  | ||||||
|  |  | ||||||
| E1: |  | ||||||
| 	RET |  | ||||||
|  |  | ||||||
| // func DaxpyInc(alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) | // func DaxpyInc(alpha float64, x, y []float64, n, incX, incY, ix, iy uintptr) | ||||||
| TEXT ·DaxpyInc(SB), NOSPLIT, $0 | TEXT ·DaxpyInc(SB), NOSPLIT, $0 | ||||||
| 	MOVHPD alpha+0(FP), X7 | 	MOVHPD alpha+0(FP), X7 | ||||||
|   | |||||||
							
								
								
									
										83
									
								
								asm/daxpyunitary_amd64.s
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								asm/daxpyunitary_amd64.s
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | |||||||
|  | // Generated by running | ||||||
|  | //  go generate github.com/gonum/internal/asm | ||||||
|  | // DO NOT EDIT. | ||||||
|  |  | ||||||
|  | // Copyright ©2015 The gonum Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a BSD-style | ||||||
|  | // license that can be found in the LICENSE file. | ||||||
|  | // | ||||||
|  | // Some of the loop unrolling code is copied from: | ||||||
|  | // http://golang.org/src/math/big/arith_amd64.s | ||||||
|  | // which is distributed under these terms: | ||||||
|  | // | ||||||
|  | // Copyright (c) 2012 The Go Authors. All rights reserved. | ||||||
|  | // | ||||||
|  | // Redistribution and use in source and binary forms, with or without | ||||||
|  | // modification, are permitted provided that the following conditions are | ||||||
|  | // met: | ||||||
|  | // | ||||||
|  | //    * Redistributions of source code must retain the above copyright | ||||||
|  | // notice, this list of conditions and the following disclaimer. | ||||||
|  | //    * Redistributions in binary form must reproduce the above | ||||||
|  | // copyright notice, this list of conditions and the following disclaimer | ||||||
|  | // in the documentation and/or other materials provided with the | ||||||
|  | // distribution. | ||||||
|  | //    * Neither the name of Google Inc. nor the names of its | ||||||
|  | // contributors may be used to endorse or promote products derived from | ||||||
|  | // this software without specific prior written permission. | ||||||
|  | // | ||||||
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  |  | ||||||
|  | //+build !noasm,!appengine | ||||||
|  |  | ||||||
|  | // TODO(fhs): use textflag.h after we drop Go 1.3 support | ||||||
|  | // #include "textflag.h" | ||||||
|  | // Don't insert stack check preamble. | ||||||
|  | #define NOSPLIT	4 | ||||||
|  |  | ||||||
|  | // func DaxpyUnitary(alpha float64, x, y []float64) | ||||||
|  | // This function assumes len(y) >= len(x). | ||||||
|  | TEXT ·DaxpyUnitary(SB), NOSPLIT, $0 | ||||||
|  | 	MOVHPD alpha+0(FP), X7 | ||||||
|  | 	MOVLPD alpha+0(FP), X7 | ||||||
|  | 	MOVQ   x+8(FP), R8 | ||||||
|  | 	MOVQ   x_len+16(FP), DI // n = len(x) | ||||||
|  | 	MOVQ   y+32(FP), R9 | ||||||
|  |  | ||||||
|  | 	MOVQ $0, SI // i = 0 | ||||||
|  | 	SUBQ $2, DI // n -= 2 | ||||||
|  | 	JL   tail   // if n < 0 goto tail | ||||||
|  |  | ||||||
|  | loop: | ||||||
|  | 	MOVUPD 0(R8)(SI*8), X0 | ||||||
|  | 	MOVUPD 0(R9)(SI*8), X1 | ||||||
|  | 	MULPD  X7, X0 | ||||||
|  | 	ADDPD  X0, X1 | ||||||
|  | 	MOVUPD X1, 0(R9)(SI*8) | ||||||
|  |  | ||||||
|  | 	ADDQ $2, SI // i += 2 | ||||||
|  | 	SUBQ $2, DI // n -= 2 | ||||||
|  | 	JGE  loop   // if n >= 0 goto loop | ||||||
|  |  | ||||||
|  | tail: | ||||||
|  | 	ADDQ $2, DI // n += 2 | ||||||
|  | 	JLE  end    // if n <= 0 goto end | ||||||
|  |  | ||||||
|  | 	MOVSD 0(R8)(SI*8), X0 | ||||||
|  | 	MOVSD 0(R9)(SI*8), X1 | ||||||
|  | 	MULSD X7, X0 | ||||||
|  | 	ADDSD X0, X1 | ||||||
|  | 	MOVSD X1, 0(R9)(SI*8) | ||||||
|  |  | ||||||
|  | end: | ||||||
|  | 	RET | ||||||
							
								
								
									
										84
									
								
								asm/daxpyunitaryto_amd64.s
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								asm/daxpyunitaryto_amd64.s
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | |||||||
|  | // Generated by running | ||||||
|  | //  go generate github.com/gonum/internal/asm | ||||||
|  | // DO NOT EDIT. | ||||||
|  |  | ||||||
|  | // Copyright ©2015 The gonum Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a BSD-style | ||||||
|  | // license that can be found in the LICENSE file. | ||||||
|  | // | ||||||
|  | // Some of the loop unrolling code is copied from: | ||||||
|  | // http://golang.org/src/math/big/arith_amd64.s | ||||||
|  | // which is distributed under these terms: | ||||||
|  | // | ||||||
|  | // Copyright (c) 2012 The Go Authors. All rights reserved. | ||||||
|  | // | ||||||
|  | // Redistribution and use in source and binary forms, with or without | ||||||
|  | // modification, are permitted provided that the following conditions are | ||||||
|  | // met: | ||||||
|  | // | ||||||
|  | //    * Redistributions of source code must retain the above copyright | ||||||
|  | // notice, this list of conditions and the following disclaimer. | ||||||
|  | //    * Redistributions in binary form must reproduce the above | ||||||
|  | // copyright notice, this list of conditions and the following disclaimer | ||||||
|  | // in the documentation and/or other materials provided with the | ||||||
|  | // distribution. | ||||||
|  | //    * Neither the name of Google Inc. nor the names of its | ||||||
|  | // contributors may be used to endorse or promote products derived from | ||||||
|  | // this software without specific prior written permission. | ||||||
|  | // | ||||||
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  |  | ||||||
|  | //+build !noasm,!appengine | ||||||
|  |  | ||||||
|  | // TODO(fhs): use textflag.h after we drop Go 1.3 support | ||||||
|  | // #include "textflag.h" | ||||||
|  | // Don't insert stack check preamble. | ||||||
|  | #define NOSPLIT	4 | ||||||
|  |  | ||||||
|  | // func DaxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) | ||||||
|  | // This function assumes len(y) >= len(x) and len(dst) >= len(x). | ||||||
|  | TEXT ·DaxpyUnitaryTo(SB), NOSPLIT, $0 | ||||||
|  | 	MOVQ   dst+0(FP), R10 | ||||||
|  | 	MOVHPD alpha+24(FP), X7 | ||||||
|  | 	MOVLPD alpha+24(FP), X7 | ||||||
|  | 	MOVQ   x+32(FP), R8 | ||||||
|  | 	MOVQ   x_len+40(FP), DI // n = len(x) | ||||||
|  | 	MOVQ   y+56(FP), R9 | ||||||
|  |  | ||||||
|  | 	MOVQ $0, SI // i = 0 | ||||||
|  | 	SUBQ $2, DI // n -= 2 | ||||||
|  | 	JL   tail   // if n < 0 goto tail | ||||||
|  |  | ||||||
|  | loop: | ||||||
|  | 	MOVUPD 0(R8)(SI*8), X0 | ||||||
|  | 	MOVUPD 0(R9)(SI*8), X1 | ||||||
|  | 	MULPD  X7, X0 | ||||||
|  | 	ADDPD  X0, X1 | ||||||
|  | 	MOVUPD X1, 0(R10)(SI*8) | ||||||
|  |  | ||||||
|  | 	ADDQ $2, SI // i += 2 | ||||||
|  | 	SUBQ $2, DI // n -= 2 | ||||||
|  | 	JGE  loop   // if n >= 0 goto loop | ||||||
|  |  | ||||||
|  | tail: | ||||||
|  | 	ADDQ $2, DI // n += 2 | ||||||
|  | 	JLE  end    // if n <= 0 goto end | ||||||
|  |  | ||||||
|  | 	MOVSD 0(R8)(SI*8), X0 | ||||||
|  | 	MOVSD 0(R9)(SI*8), X1 | ||||||
|  | 	MULSD X7, X0 | ||||||
|  | 	ADDSD X0, X1 | ||||||
|  | 	MOVSD X1, 0(R10)(SI*8) | ||||||
|  |  | ||||||
|  | end: | ||||||
|  | 	RET | ||||||
							
								
								
									
										158
									
								
								asm/genasm.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										158
									
								
								asm/genasm.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,158 @@ | |||||||
|  | // Copyright ©2015 The gonum Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a BSD-style | ||||||
|  | // license that can be found in the LICENSE file. | ||||||
|  |  | ||||||
|  | //+build ignore | ||||||
|  |  | ||||||
|  | // gendaxpy creates the assembler code for the family of Daxpy functions. | ||||||
|  | package main | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"log" | ||||||
|  | 	"os" | ||||||
|  | 	"strings" | ||||||
|  | 	"text/template" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var asm = template.Must(template.New("asm").Parse(`{{define "header"}}// Generated by running | ||||||
|  | //  go generate github.com/gonum/internal/asm | ||||||
|  | // DO NOT EDIT. | ||||||
|  |  | ||||||
|  | // Copyright ©2015 The gonum Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a BSD-style | ||||||
|  | // license that can be found in the LICENSE file. | ||||||
|  | // | ||||||
|  | // Some of the loop unrolling code is copied from: | ||||||
|  | // http://golang.org/src/math/big/arith_amd64.s | ||||||
|  | // which is distributed under these terms: | ||||||
|  | // | ||||||
|  | // Copyright (c) 2012 The Go Authors. All rights reserved. | ||||||
|  | // | ||||||
|  | // Redistribution and use in source and binary forms, with or without | ||||||
|  | // modification, are permitted provided that the following conditions are | ||||||
|  | // met: | ||||||
|  | // | ||||||
|  | //    * Redistributions of source code must retain the above copyright | ||||||
|  | // notice, this list of conditions and the following disclaimer. | ||||||
|  | //    * Redistributions in binary form must reproduce the above | ||||||
|  | // copyright notice, this list of conditions and the following disclaimer | ||||||
|  | // in the documentation and/or other materials provided with the | ||||||
|  | // distribution. | ||||||
|  | //    * Neither the name of Google Inc. nor the names of its | ||||||
|  | // contributors may be used to endorse or promote products derived from | ||||||
|  | // this software without specific prior written permission. | ||||||
|  | // | ||||||
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  |  | ||||||
|  | //+build !noasm,!appengine | ||||||
|  |  | ||||||
|  | // TODO(fhs): use textflag.h after we drop Go 1.3 support | ||||||
|  | // #include "textflag.h" | ||||||
|  | // Don't insert stack check preamble. | ||||||
|  | #define NOSPLIT	4 | ||||||
|  | {{end}} | ||||||
|  |  | ||||||
|  | {{define "unitary_preamble"}} | ||||||
|  | // func DaxpyUnitary(alpha float64, x, y []float64) | ||||||
|  | // This function assumes len(y) >= len(x). | ||||||
|  | TEXT ·DaxpyUnitary(SB), NOSPLIT, $0 | ||||||
|  | 	MOVHPD alpha+0(FP), X7 | ||||||
|  | 	MOVLPD alpha+0(FP), X7 | ||||||
|  | 	MOVQ   x+8(FP), R8 | ||||||
|  | 	MOVQ   x_len+16(FP), DI // n = len(x) | ||||||
|  | 	MOVQ   y+32(FP), R9 | ||||||
|  | {{end}} | ||||||
|  |  | ||||||
|  | {{define "unitaryto_preamble"}} | ||||||
|  | // func DaxpyUnitaryTo(dst []float64, alpha float64, x, y []float64) | ||||||
|  | // This function assumes len(y) >= len(x) and len(dst) >= len(x). | ||||||
|  | TEXT ·DaxpyUnitaryTo(SB), NOSPLIT, $0 | ||||||
|  | 	MOVQ   dst+0(FP), R10 | ||||||
|  | 	MOVHPD alpha+24(FP), X7 | ||||||
|  | 	MOVLPD alpha+24(FP), X7 | ||||||
|  | 	MOVQ   x+32(FP), R8 | ||||||
|  | 	MOVQ   x_len+40(FP), DI // n = len(x) | ||||||
|  | 	MOVQ   y+56(FP), R9 | ||||||
|  | {{end}} | ||||||
|  |  | ||||||
|  | {{define "unitary_body"}} | ||||||
|  | 	MOVQ $0, SI // i = 0 | ||||||
|  | 	SUBQ $2, DI // n -= 2 | ||||||
|  | 	JL   tail   // if n < 0 goto tail | ||||||
|  |  | ||||||
|  | loop: | ||||||
|  | 	MOVUPD 0(R8)(SI*8), X0 | ||||||
|  | 	MOVUPD 0(R9)(SI*8), X1 | ||||||
|  | 	MULPD  X7, X0 | ||||||
|  | 	ADDPD  X0, X1 | ||||||
|  | 	MOVUPD X1, 0({{if .To}}R10{{else}}R9{{end}})(SI*8) | ||||||
|  |  | ||||||
|  | 	ADDQ $2, SI // i += 2 | ||||||
|  | 	SUBQ $2, DI // n -= 2 | ||||||
|  | 	JGE  loop   // if n >= 0 goto loop | ||||||
|  |  | ||||||
|  | tail: | ||||||
|  | 	ADDQ $2, DI // n += 2 | ||||||
|  | 	JLE  end    // if n <= 0 goto end | ||||||
|  |  | ||||||
|  | 	MOVSD 0(R8)(SI*8), X0 | ||||||
|  | 	MOVSD 0(R9)(SI*8), X1 | ||||||
|  | 	MULSD X7, X0 | ||||||
|  | 	ADDSD X0, X1 | ||||||
|  | 	MOVSD X1, 0({{if .To}}R10{{else}}R9{{end}})(SI*8) | ||||||
|  |  | ||||||
|  | end: | ||||||
|  | 	RET | ||||||
|  | {{end}}`)) | ||||||
|  |  | ||||||
|  | type Function struct { | ||||||
|  | 	Name string | ||||||
|  | 	To   bool | ||||||
|  |  | ||||||
|  | 	template string | ||||||
|  | } | ||||||
|  |  | ||||||
|  | var funcs = []Function{ | ||||||
|  | 	{ | ||||||
|  | 		Name:     "DaxpyUnitary", | ||||||
|  | 		To:       false, | ||||||
|  | 		template: `{{template "header" .}}{{template "unitary_preamble" .}}{{template "unitary_body" .}}`, | ||||||
|  | 	}, | ||||||
|  | 	{ | ||||||
|  | 		Name:     "DaxpyUnitaryTo", | ||||||
|  | 		To:       true, | ||||||
|  | 		template: `{{template "header" .}}{{template "unitaryto_preamble" .}}{{template "unitary_body" .}}`, | ||||||
|  | 	}, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func main() { | ||||||
|  | 	for _, fn := range funcs { | ||||||
|  | 		t, err := template.Must(asm.Clone()).Parse(fn.template) | ||||||
|  | 		if err != nil { | ||||||
|  | 			log.Fatalf("failed to parse template: %v", err) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		file := strings.ToLower(fn.Name) + "_amd64.s" | ||||||
|  | 		fmt.Println("Generating", file) | ||||||
|  | 		f, err := os.Create(file) | ||||||
|  | 		if err != nil { | ||||||
|  | 			log.Fatalf("failed to create %q: %v", file, err) | ||||||
|  | 		} | ||||||
|  | 		err = t.Execute(f, fn) | ||||||
|  | 		if err != nil { | ||||||
|  | 			log.Fatalf("failed to execute template: %v", err) | ||||||
|  | 		} | ||||||
|  | 		f.Close() | ||||||
|  | 	} | ||||||
|  | } | ||||||
| @@ -4,5 +4,6 @@ | |||||||
|  |  | ||||||
| //go:generate ./single_precision | //go:generate ./single_precision | ||||||
| //go:generate ./complex | //go:generate ./complex | ||||||
|  | //go:generate go run genasm.go | ||||||
|  |  | ||||||
| package asm | package asm | ||||||
|   | |||||||
| @@ -6,6 +6,12 @@ | |||||||
|  |  | ||||||
| package asm | package asm | ||||||
|  |  | ||||||
|  | func SaxpyUnitary(alpha float32, x, y []float32) { | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		y[i] += alpha * v | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| func SaxpyUnitaryTo(dst []float32, alpha float32, x, y []float32) { | func SaxpyUnitaryTo(dst []float32, alpha float32, x, y []float32) { | ||||||
| 	for i, v := range x { | 	for i, v := range x { | ||||||
| 		dst[i] = alpha*v + y[i] | 		dst[i] = alpha*v + y[i] | ||||||
|   | |||||||
| @@ -6,6 +6,12 @@ | |||||||
|  |  | ||||||
| package asm | package asm | ||||||
|  |  | ||||||
|  | func ZaxpyUnitary(alpha complex128, x, y []complex128) { | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		y[i] += alpha * v | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| func ZaxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) { | func ZaxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) { | ||||||
| 	for i, v := range x { | 	for i, v := range x { | ||||||
| 		dst[i] = alpha*v + y[i] | 		dst[i] = alpha*v + y[i] | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Vladimir Chalupecky
					Vladimir Chalupecky