From 1327fd6e2114304a1acc8415efe069520fa6ca86 Mon Sep 17 00:00:00 2001 From: Ingo Oppermann Date: Tue, 20 Aug 2024 14:14:47 +0200 Subject: [PATCH] Add memfs storage based on dolthub's swiss maps --- go.mod | 2 + go.sum | 4 + io/fs/fs_test.go | 10 +- io/fs/mem.go | 2 + io/fs/mem_storage.go | 89 +++++ io/fs/mem_test.go | 156 ++++---- vendor/github.com/dolthub/maphash/.gitignore | 2 + vendor/github.com/dolthub/maphash/LICENSE | 201 ++++++++++ vendor/github.com/dolthub/maphash/README.md | 4 + vendor/github.com/dolthub/maphash/hasher.go | 48 +++ vendor/github.com/dolthub/maphash/runtime.go | 111 ++++++ vendor/github.com/dolthub/swiss/.gitignore | 5 + vendor/github.com/dolthub/swiss/LICENSE | 201 ++++++++++ vendor/github.com/dolthub/swiss/README.md | 54 +++ vendor/github.com/dolthub/swiss/bits.go | 58 +++ vendor/github.com/dolthub/swiss/bits_amd64.go | 50 +++ vendor/github.com/dolthub/swiss/map.go | 359 ++++++++++++++++++ vendor/github.com/dolthub/swiss/simd/match.s | 19 + .../dolthub/swiss/simd/match_amd64.go | 9 + vendor/modules.txt | 7 + 20 files changed, 1317 insertions(+), 74 deletions(-) create mode 100644 vendor/github.com/dolthub/maphash/.gitignore create mode 100644 vendor/github.com/dolthub/maphash/LICENSE create mode 100644 vendor/github.com/dolthub/maphash/README.md create mode 100644 vendor/github.com/dolthub/maphash/hasher.go create mode 100644 vendor/github.com/dolthub/maphash/runtime.go create mode 100644 vendor/github.com/dolthub/swiss/.gitignore create mode 100644 vendor/github.com/dolthub/swiss/LICENSE create mode 100644 vendor/github.com/dolthub/swiss/README.md create mode 100644 vendor/github.com/dolthub/swiss/bits.go create mode 100644 vendor/github.com/dolthub/swiss/bits_amd64.go create mode 100644 vendor/github.com/dolthub/swiss/map.go create mode 100644 vendor/github.com/dolthub/swiss/simd/match.s create mode 100644 vendor/github.com/dolthub/swiss/simd/match_amd64.go diff --git a/go.mod b/go.mod index 50e55ef3..c9b6f090 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/caddyserver/certmagic v0.21.3 github.com/datarhei/gosrt v0.7.0 github.com/datarhei/joy4 v0.0.0-20240603190808-b1407345907e + github.com/dolthub/swiss v0.2.1 github.com/fujiwara/shapeio v1.0.0 github.com/go-playground/validator/v10 v10.22.0 github.com/gobwas/glob v0.2.3 @@ -61,6 +62,7 @@ require ( github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/dolthub/maphash v0.1.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/fatih/color v1.17.0 // indirect github.com/gabriel-vasile/mimetype v1.4.5 // indirect diff --git a/go.sum b/go.sum index 4213378d..381e7d01 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,10 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g= github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= +github.com/dolthub/maphash v0.1.0 h1:bsQ7JsF4FkkWyrP3oCnFJgrCUAFbFf3kOl4L/QxPDyQ= +github.com/dolthub/maphash v0.1.0/go.mod h1:gkg4Ch4CdCDu5h6PMriVLawB7koZ+5ijb9puGMV50a4= +github.com/dolthub/swiss v0.2.1 h1:gs2osYs5SJkAaH5/ggVJqXQxRXtWshF6uE0lgR/Y3Gw= +github.com/dolthub/swiss v0.2.1/go.mod h1:8AhKZZ1HK7g18j7v7k6c5cYIGEZJcPn0ARsai8cUrh0= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= diff --git a/io/fs/fs_test.go b/io/fs/fs_test.go index 9d52dddb..7e7c2362 100644 --- a/io/fs/fs_test.go +++ b/io/fs/fs_test.go @@ -59,8 +59,14 @@ func TestFilesystem(t *testing.T) { os.RemoveAll("./testing/") filesystems := map[string]func(string) (Filesystem, error){ - "memfs": func(name string) (Filesystem, error) { - return NewMemFilesystem(MemConfig{}) + "memfs-map": func(name string) (Filesystem, error) { + return NewMemFilesystem(MemConfig{Storage: "map"}) + }, + "memfs-xsync": func(name string) (Filesystem, error) { + return NewMemFilesystem(MemConfig{Storage: "xsync"}) + }, + "memfs-swiss": func(name string) (Filesystem, error) { + return NewMemFilesystem(MemConfig{Storage: "swiss"}) }, "diskfs": func(name string) (Filesystem, error) { return NewRootedDiskFilesystem(RootedDiskConfig{ diff --git a/io/fs/mem.go b/io/fs/mem.go index 2033d349..dee83c78 100644 --- a/io/fs/mem.go +++ b/io/fs/mem.go @@ -208,6 +208,8 @@ func NewMemFilesystem(config MemConfig) (Filesystem, error) { if config.Storage == "map" { fs.storage = newMapStorage() + } else if config.Storage == "swiss" { + fs.storage = newSwissMapStorage() } else { fs.storage = newMapOfStorage() } diff --git a/io/fs/mem_storage.go b/io/fs/mem_storage.go index e6bc2362..5df8f946 100644 --- a/io/fs/mem_storage.go +++ b/io/fs/mem_storage.go @@ -4,6 +4,7 @@ import ( "bytes" "sync" + "github.com/dolthub/swiss" "github.com/puzpuzpuz/xsync/v3" ) @@ -182,3 +183,91 @@ func (m *mapStorage) Range(f func(key string, value *memFile) bool) { } } } + +type swissMapStorage struct { + lock *xsync.RBMutex + files *swiss.Map[string, *memFile] +} + +func newSwissMapStorage() memStorage { + m := &swissMapStorage{ + lock: xsync.NewRBMutex(), + files: swiss.NewMap[string, *memFile](128), + } + + return m +} + +func (m *swissMapStorage) Delete(key string) (*memFile, bool) { + m.lock.Lock() + defer m.lock.Unlock() + + file, hasFile := m.files.Get(key) + if !hasFile { + return nil, false + } + + m.files.Delete(key) + + return file, true +} + +func (m *swissMapStorage) Store(key string, value *memFile) (*memFile, bool) { + m.lock.Lock() + defer m.lock.Unlock() + + file, hasFile := m.files.Get(key) + m.files.Put(key, value) + + return file, hasFile +} + +func (m *swissMapStorage) Load(key string) (*memFile, bool) { + token := m.lock.RLock() + defer m.lock.RUnlock(token) + + return m.files.Get(key) +} + +func (m *swissMapStorage) LoadAndCopy(key string) (*memFile, bool) { + token := m.lock.RLock() + defer m.lock.RUnlock(token) + + v, ok := m.files.Get(key) + if !ok { + return nil, false + } + + f := &memFile{ + memFileInfo: memFileInfo{ + name: v.name, + size: v.size, + dir: v.dir, + lastMod: v.lastMod, + linkTo: v.linkTo, + }, + r: nil, + } + + if v.data != nil { + f.data = bytes.NewBuffer(v.data.Bytes()) + } + + return f, true +} + +func (m *swissMapStorage) Has(key string) bool { + token := m.lock.RLock() + defer m.lock.RUnlock(token) + + return m.files.Has(key) +} + +func (m *swissMapStorage) Range(f func(key string, value *memFile) bool) { + token := m.lock.RLock() + defer m.lock.RUnlock(token) + + m.files.Iter(func(key string, value *memFile) bool { + return !f(key, value) + }) +} diff --git a/io/fs/mem_test.go b/io/fs/mem_test.go index 0f8c1758..1e9f43fb 100644 --- a/io/fs/mem_test.go +++ b/io/fs/mem_test.go @@ -30,66 +30,6 @@ func TestMemFromDir(t *testing.T) { }, names) } -func BenchmarkMemList(b *testing.B) { - mem, err := NewMemFilesystem(MemConfig{}) - require.NoError(b, err) - - for i := 0; i < 1000; i++ { - id := rand.StringAlphanumeric(8) - path := fmt.Sprintf("/%d/%s.dat", i, id) - mem.WriteFile(path, []byte("foobar")) - } - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - mem.List("/", ListOptions{ - Pattern: "/5/**", - }) - } -} - -func BenchmarkMemRemoveList(b *testing.B) { - mem, err := NewMemFilesystem(MemConfig{}) - require.NoError(b, err) - - for i := 0; i < 1000; i++ { - id := rand.StringAlphanumeric(8) - path := fmt.Sprintf("/%d/%s.dat", i, id) - mem.WriteFile(path, []byte("foobar")) - } - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - mem.RemoveList("/", ListOptions{ - Pattern: "/5/**", - }) - } -} - -func BenchmarkMemReadFile(b *testing.B) { - mem, err := NewMemFilesystem(MemConfig{}) - require.NoError(b, err) - - nFiles := 1000 - - for i := 0; i < nFiles; i++ { - path := fmt.Sprintf("/%d.dat", i) - mem.WriteFile(path, []byte(rand.StringAlphanumeric(2*1024))) - } - - r := gorand.New(gorand.NewSource(42)) - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - num := r.Intn(nFiles) - f := mem.Open("/" + strconv.Itoa(num) + ".dat") - f.Close() - } -} - func TestWriteWhileRead(t *testing.T) { fs, err := NewMemFilesystem(MemConfig{}) require.NoError(t, err) @@ -108,29 +48,101 @@ func TestWriteWhileRead(t *testing.T) { require.Equal(t, []byte("xxxxx"), data) } -func BenchmarkMemWriteFile(b *testing.B) { - mem, err := NewMemFilesystem(MemConfig{}) - require.NoError(b, err) +func BenchmarkMemStorages(b *testing.B) { + storages := []string{ + "map", + "xsync", + "swiss", + } + benchmarks := map[string]func(*testing.B, Filesystem){ + "list": benchmarkMemList, + "removeList": benchmarkMemRemoveList, + "readFile": benchmarkMemReadFile, + "writeFile": benchmarkMemWriteFile, + "readWhileWrite": benchmarkMemReadFileWhileWriting, + } + + for name, fn := range benchmarks { + for _, storage := range storages { + mem, err := NewMemFilesystem(MemConfig{Storage: storage}) + require.NoError(b, err) + + b.Run(name+"-"+storage, func(b *testing.B) { + fn(b, mem) + }) + } + } +} + +func benchmarkMemList(b *testing.B, fs Filesystem) { + for i := 0; i < 1000; i++ { + id := rand.StringAlphanumeric(8) + path := fmt.Sprintf("/%d/%s.dat", i, id) + fs.WriteFile(path, []byte("foobar")) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + fs.List("/", ListOptions{ + Pattern: "/5/**", + }) + } +} + +func benchmarkMemRemoveList(b *testing.B, fs Filesystem) { + for i := 0; i < 1000; i++ { + id := rand.StringAlphanumeric(8) + path := fmt.Sprintf("/%d/%s.dat", i, id) + fs.WriteFile(path, []byte("foobar")) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + fs.RemoveList("/", ListOptions{ + Pattern: "/5/**", + }) + } +} + +func benchmarkMemReadFile(b *testing.B, fs Filesystem) { + nFiles := 1000 + + for i := 0; i < nFiles; i++ { + path := fmt.Sprintf("/%d.dat", i) + fs.WriteFile(path, []byte(rand.StringAlphanumeric(2*1024))) + } + + r := gorand.New(gorand.NewSource(42)) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + num := r.Intn(nFiles) + f := fs.Open("/" + strconv.Itoa(num) + ".dat") + f.Close() + } +} + +func benchmarkMemWriteFile(b *testing.B, fs Filesystem) { nFiles := 50000 for i := 0; i < nFiles; i++ { path := fmt.Sprintf("/%d.dat", i) - mem.WriteFile(path, []byte(rand.StringAlphanumeric(1))) + fs.WriteFile(path, []byte(rand.StringAlphanumeric(1))) } b.ResetTimer() for i := 0; i < b.N; i++ { path := fmt.Sprintf("/%d.dat", i%nFiles) - mem.WriteFile(path, []byte(rand.StringAlphanumeric(1))) + fs.WriteFile(path, []byte(rand.StringAlphanumeric(1))) } } -func BenchmarkMemReadFileWhileWriting(b *testing.B) { - mem, err := NewMemFilesystem(MemConfig{}) - require.NoError(b, err) - +func benchmarkMemReadFileWhileWriting(b *testing.B, fs Filesystem) { nReaders := 500 nWriters := 1000 nFiles := 30 @@ -148,7 +160,7 @@ func BenchmarkMemReadFileWhileWriting(b *testing.B) { go func(ctx context.Context, from int) { for i := 0; i < nFiles; i++ { path := fmt.Sprintf("/%d.dat", from+i) - mem.WriteFile(path, data) + fs.WriteFile(path, data) } ticker := time.NewTicker(40 * time.Millisecond) @@ -163,7 +175,7 @@ func BenchmarkMemReadFileWhileWriting(b *testing.B) { case <-ticker.C: num := gorand.Intn(nFiles) + from path := fmt.Sprintf("/%d.dat", num) - mem.WriteFile(path, data) + fs.WriteFile(path, data) } } }(ctx, i*nFiles) @@ -183,7 +195,7 @@ func BenchmarkMemReadFileWhileWriting(b *testing.B) { for i := 0; i < b.N; i++ { num := gorand.Intn(nWriters * nFiles) - f := mem.Open("/" + strconv.Itoa(num) + ".dat") + f := fs.Open("/" + strconv.Itoa(num) + ".dat") f.Close() } }() diff --git a/vendor/github.com/dolthub/maphash/.gitignore b/vendor/github.com/dolthub/maphash/.gitignore new file mode 100644 index 00000000..977a7cad --- /dev/null +++ b/vendor/github.com/dolthub/maphash/.gitignore @@ -0,0 +1,2 @@ +*.idea +*.test \ No newline at end of file diff --git a/vendor/github.com/dolthub/maphash/LICENSE b/vendor/github.com/dolthub/maphash/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/vendor/github.com/dolthub/maphash/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/dolthub/maphash/README.md b/vendor/github.com/dolthub/maphash/README.md new file mode 100644 index 00000000..d91530f9 --- /dev/null +++ b/vendor/github.com/dolthub/maphash/README.md @@ -0,0 +1,4 @@ +# maphash + +Hash any `comparable` type using Golang's fast runtime hash. +Uses [AES](https://en.wikipedia.org/wiki/AES_instruction_set) instructions when available. \ No newline at end of file diff --git a/vendor/github.com/dolthub/maphash/hasher.go b/vendor/github.com/dolthub/maphash/hasher.go new file mode 100644 index 00000000..ef53596a --- /dev/null +++ b/vendor/github.com/dolthub/maphash/hasher.go @@ -0,0 +1,48 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package maphash + +import "unsafe" + +// Hasher hashes values of type K. +// Uses runtime AES-based hashing. +type Hasher[K comparable] struct { + hash hashfn + seed uintptr +} + +// NewHasher creates a new Hasher[K] with a random seed. +func NewHasher[K comparable]() Hasher[K] { + return Hasher[K]{ + hash: getRuntimeHasher[K](), + seed: newHashSeed(), + } +} + +// NewSeed returns a copy of |h| with a new hash seed. +func NewSeed[K comparable](h Hasher[K]) Hasher[K] { + return Hasher[K]{ + hash: h.hash, + seed: newHashSeed(), + } +} + +// Hash hashes |key|. +func (h Hasher[K]) Hash(key K) uint64 { + // promise to the compiler that pointer + // |p| does not escape the stack. + p := noescape(unsafe.Pointer(&key)) + return uint64(h.hash(p, h.seed)) +} diff --git a/vendor/github.com/dolthub/maphash/runtime.go b/vendor/github.com/dolthub/maphash/runtime.go new file mode 100644 index 00000000..29cd6a8e --- /dev/null +++ b/vendor/github.com/dolthub/maphash/runtime.go @@ -0,0 +1,111 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file incorporates work covered by the following copyright and +// permission notice: +// +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.18 || go1.19 +// +build go1.18 go1.19 + +package maphash + +import ( + "math/rand" + "unsafe" +) + +type hashfn func(unsafe.Pointer, uintptr) uintptr + +func getRuntimeHasher[K comparable]() (h hashfn) { + a := any(make(map[K]struct{})) + i := (*mapiface)(unsafe.Pointer(&a)) + h = i.typ.hasher + return +} + +func newHashSeed() uintptr { + return uintptr(rand.Int()) +} + +// noescape hides a pointer from escape analysis. It is the identity function +// but escape analysis doesn't think the output depends on the input. +// noescape is inlined and currently compiles down to zero instructions. +// USE CAREFULLY! +// This was copied from the runtime (via pkg "strings"); see issues 23382 and 7921. +// +//go:nosplit +//go:nocheckptr +func noescape(p unsafe.Pointer) unsafe.Pointer { + x := uintptr(p) + return unsafe.Pointer(x ^ 0) +} + +type mapiface struct { + typ *maptype + val *hmap +} + +// go/src/runtime/type.go +type maptype struct { + typ _type + key *_type + elem *_type + bucket *_type + // function for hashing keys (ptr to key, seed) -> hash + hasher func(unsafe.Pointer, uintptr) uintptr + keysize uint8 + elemsize uint8 + bucketsize uint16 + flags uint32 +} + +// go/src/runtime/map.go +type hmap struct { + count int + flags uint8 + B uint8 + noverflow uint16 + // hash seed + hash0 uint32 + buckets unsafe.Pointer + oldbuckets unsafe.Pointer + nevacuate uintptr + // true type is *mapextra + // but we don't need this data + extra unsafe.Pointer +} + +// go/src/runtime/type.go +type tflag uint8 +type nameOff int32 +type typeOff int32 + +// go/src/runtime/type.go +type _type struct { + size uintptr + ptrdata uintptr + hash uint32 + tflag tflag + align uint8 + fieldAlign uint8 + kind uint8 + equal func(unsafe.Pointer, unsafe.Pointer) bool + gcdata *byte + str nameOff + ptrToThis typeOff +} diff --git a/vendor/github.com/dolthub/swiss/.gitignore b/vendor/github.com/dolthub/swiss/.gitignore new file mode 100644 index 00000000..1f9adf93 --- /dev/null +++ b/vendor/github.com/dolthub/swiss/.gitignore @@ -0,0 +1,5 @@ +**/.idea/ +.vscode +.run +venv +.DS_Store \ No newline at end of file diff --git a/vendor/github.com/dolthub/swiss/LICENSE b/vendor/github.com/dolthub/swiss/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/vendor/github.com/dolthub/swiss/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/dolthub/swiss/README.md b/vendor/github.com/dolthub/swiss/README.md new file mode 100644 index 00000000..71c6f7dd --- /dev/null +++ b/vendor/github.com/dolthub/swiss/README.md @@ -0,0 +1,54 @@ +# SwissMap + +SwissMap is a hash table adapated from the "SwissTable" family of hash tables from [Abseil](https://abseil.io/blog/20180927-swisstables). It uses [AES](https://github.com/dolthub/maphash) instructions for fast-hashing and performs key lookups in parallel using [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions) instructions. Because of these optimizations, SwissMap is faster and more memory efficient than Golang's built-in `map`. If you'd like to learn more about its design and implementation, check out this [blog post](https://www.dolthub.com/blog/2023-03-28-swiss-map/) announcing its release. + + +## Example + +SwissMap exposes the same interface as the built-in `map`. Give it a try using this [Go playground](https://go.dev/play/p/JPDC5WhYN7g). + +```go +package main + +import "github.com/dolthub/swiss" + +func main() { + m := swiss.NewMap[string, int](42) + + m.Put("foo", 1) + m.Put("bar", 2) + + m.Iter(func(k string, v int) (stop bool) { + println("iter", k, v) + return false // continue + }) + + if x, ok := m.Get("foo"); ok { + println(x) + } + if m.Has("bar") { + x, _ := m.Get("bar") + println(x) + } + + m.Put("foo", -1) + m.Delete("bar") + + if x, ok := m.Get("foo"); ok { + println(x) + } + if m.Has("bar") { + x, _ := m.Get("bar") + println(x) + } + + m.Clear() + + // Output: + // iter foo 1 + // iter bar 2 + // 1 + // 2 + // -1 +} +``` diff --git a/vendor/github.com/dolthub/swiss/bits.go b/vendor/github.com/dolthub/swiss/bits.go new file mode 100644 index 00000000..f435b6dc --- /dev/null +++ b/vendor/github.com/dolthub/swiss/bits.go @@ -0,0 +1,58 @@ +// Copyright 2023 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !amd64 || nosimd + +package swiss + +import ( + "math/bits" + "unsafe" +) + +const ( + groupSize = 8 + maxAvgGroupLoad = 7 + + loBits uint64 = 0x0101010101010101 + hiBits uint64 = 0x8080808080808080 +) + +type bitset uint64 + +func metaMatchH2(m *metadata, h h2) bitset { + // https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord + return hasZeroByte(castUint64(m) ^ (loBits * uint64(h))) +} + +func metaMatchEmpty(m *metadata) bitset { + return hasZeroByte(castUint64(m) ^ hiBits) +} + +func nextMatch(b *bitset) uint32 { + s := uint32(bits.TrailingZeros64(uint64(*b))) + *b &= ^(1 << s) // clear bit |s| + return s >> 3 // div by 8 +} + +func hasZeroByte(x uint64) bitset { + return bitset(((x - loBits) & ^(x)) & hiBits) +} + +func castUint64(m *metadata) uint64 { + return *(*uint64)((unsafe.Pointer)(m)) +} + +//go:linkname fastrand runtime.fastrand +func fastrand() uint32 diff --git a/vendor/github.com/dolthub/swiss/bits_amd64.go b/vendor/github.com/dolthub/swiss/bits_amd64.go new file mode 100644 index 00000000..8b91f57c --- /dev/null +++ b/vendor/github.com/dolthub/swiss/bits_amd64.go @@ -0,0 +1,50 @@ +// Copyright 2023 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build amd64 && !nosimd + +package swiss + +import ( + "math/bits" + _ "unsafe" + + "github.com/dolthub/swiss/simd" +) + +const ( + groupSize = 16 + maxAvgGroupLoad = 14 +) + +type bitset uint16 + +func metaMatchH2(m *metadata, h h2) bitset { + b := simd.MatchMetadata((*[16]int8)(m), int8(h)) + return bitset(b) +} + +func metaMatchEmpty(m *metadata) bitset { + b := simd.MatchMetadata((*[16]int8)(m), empty) + return bitset(b) +} + +func nextMatch(b *bitset) (s uint32) { + s = uint32(bits.TrailingZeros16(uint16(*b))) + *b &= ^(1 << s) // clear bit |s| + return +} + +//go:linkname fastrand runtime.fastrand +func fastrand() uint32 diff --git a/vendor/github.com/dolthub/swiss/map.go b/vendor/github.com/dolthub/swiss/map.go new file mode 100644 index 00000000..e5ad2038 --- /dev/null +++ b/vendor/github.com/dolthub/swiss/map.go @@ -0,0 +1,359 @@ +// Copyright 2023 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package swiss + +import ( + "github.com/dolthub/maphash" +) + +const ( + maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize) +) + +// Map is an open-addressing hash map +// based on Abseil's flat_hash_map. +type Map[K comparable, V any] struct { + ctrl []metadata + groups []group[K, V] + hash maphash.Hasher[K] + resident uint32 + dead uint32 + limit uint32 +} + +// metadata is the h2 metadata array for a group. +// find operations first probe the controls bytes +// to filter candidates before matching keys +type metadata [groupSize]int8 + +// group is a group of 16 key-value pairs +type group[K comparable, V any] struct { + keys [groupSize]K + values [groupSize]V +} + +const ( + h1Mask uint64 = 0xffff_ffff_ffff_ff80 + h2Mask uint64 = 0x0000_0000_0000_007f + empty int8 = -128 // 0b1000_0000 + tombstone int8 = -2 // 0b1111_1110 +) + +// h1 is a 57 bit hash prefix +type h1 uint64 + +// h2 is a 7 bit hash suffix +type h2 int8 + +// NewMap constructs a Map. +func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) { + groups := numGroups(sz) + m = &Map[K, V]{ + ctrl: make([]metadata, groups), + groups: make([]group[K, V], groups), + hash: maphash.NewHasher[K](), + limit: groups * maxAvgGroupLoad, + } + for i := range m.ctrl { + m.ctrl[i] = newEmptyMetadata() + } + return +} + +// Has returns true if |key| is present in |m|. +func (m *Map[K, V]) Has(key K) (ok bool) { + hi, lo := splitHash(m.hash.Hash(key)) + g := probeStart(hi, len(m.groups)) + for { // inlined find loop + matches := metaMatchH2(&m.ctrl[g], lo) + for matches != 0 { + s := nextMatch(&matches) + if key == m.groups[g].keys[s] { + ok = true + return + } + } + // |key| is not in group |g|, + // stop probing if we see an empty slot + matches = metaMatchEmpty(&m.ctrl[g]) + if matches != 0 { + ok = false + return + } + g += 1 // linear probing + if g >= uint32(len(m.groups)) { + g = 0 + } + } +} + +// Get returns the |value| mapped by |key| if one exists. +func (m *Map[K, V]) Get(key K) (value V, ok bool) { + hi, lo := splitHash(m.hash.Hash(key)) + g := probeStart(hi, len(m.groups)) + for { // inlined find loop + matches := metaMatchH2(&m.ctrl[g], lo) + for matches != 0 { + s := nextMatch(&matches) + if key == m.groups[g].keys[s] { + value, ok = m.groups[g].values[s], true + return + } + } + // |key| is not in group |g|, + // stop probing if we see an empty slot + matches = metaMatchEmpty(&m.ctrl[g]) + if matches != 0 { + ok = false + return + } + g += 1 // linear probing + if g >= uint32(len(m.groups)) { + g = 0 + } + } +} + +// Put attempts to insert |key| and |value| +func (m *Map[K, V]) Put(key K, value V) { + if m.resident >= m.limit { + m.rehash(m.nextSize()) + } + hi, lo := splitHash(m.hash.Hash(key)) + g := probeStart(hi, len(m.groups)) + for { // inlined find loop + matches := metaMatchH2(&m.ctrl[g], lo) + for matches != 0 { + s := nextMatch(&matches) + if key == m.groups[g].keys[s] { // update + m.groups[g].keys[s] = key + m.groups[g].values[s] = value + return + } + } + // |key| is not in group |g|, + // stop probing if we see an empty slot + matches = metaMatchEmpty(&m.ctrl[g]) + if matches != 0 { // insert + s := nextMatch(&matches) + m.groups[g].keys[s] = key + m.groups[g].values[s] = value + m.ctrl[g][s] = int8(lo) + m.resident++ + return + } + g += 1 // linear probing + if g >= uint32(len(m.groups)) { + g = 0 + } + } +} + +// Delete attempts to remove |key|, returns true successful. +func (m *Map[K, V]) Delete(key K) (ok bool) { + hi, lo := splitHash(m.hash.Hash(key)) + g := probeStart(hi, len(m.groups)) + for { + matches := metaMatchH2(&m.ctrl[g], lo) + for matches != 0 { + s := nextMatch(&matches) + if key == m.groups[g].keys[s] { + ok = true + // optimization: if |m.ctrl[g]| contains any empty + // metadata bytes, we can physically delete |key| + // rather than placing a tombstone. + // The observation is that any probes into group |g| + // would already be terminated by the existing empty + // slot, and therefore reclaiming slot |s| will not + // cause premature termination of probes into |g|. + if metaMatchEmpty(&m.ctrl[g]) != 0 { + m.ctrl[g][s] = empty + m.resident-- + } else { + m.ctrl[g][s] = tombstone + m.dead++ + } + var k K + var v V + m.groups[g].keys[s] = k + m.groups[g].values[s] = v + return + } + } + // |key| is not in group |g|, + // stop probing if we see an empty slot + matches = metaMatchEmpty(&m.ctrl[g]) + if matches != 0 { // |key| absent + ok = false + return + } + g += 1 // linear probing + if g >= uint32(len(m.groups)) { + g = 0 + } + } +} + +// Iter iterates the elements of the Map, passing them to the callback. +// It guarantees that any key in the Map will be visited only once, and +// for un-mutated Maps, every key will be visited once. If the Map is +// Mutated during iteration, mutations will be reflected on return from +// Iter, but the set of keys visited by Iter is non-deterministic. +func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) { + // take a consistent view of the table in case + // we rehash during iteration + ctrl, groups := m.ctrl, m.groups + // pick a random starting group + g := randIntN(len(groups)) + for n := 0; n < len(groups); n++ { + for s, c := range ctrl[g] { + if c == empty || c == tombstone { + continue + } + k, v := groups[g].keys[s], groups[g].values[s] + if stop := cb(k, v); stop { + return + } + } + g++ + if g >= uint32(len(groups)) { + g = 0 + } + } +} + +// Clear removes all elements from the Map. +func (m *Map[K, V]) Clear() { + for i, c := range m.ctrl { + for j := range c { + m.ctrl[i][j] = empty + } + } + var k K + var v V + for i := range m.groups { + g := &m.groups[i] + for i := range g.keys { + g.keys[i] = k + g.values[i] = v + } + } + m.resident, m.dead = 0, 0 +} + +// Count returns the number of elements in the Map. +func (m *Map[K, V]) Count() int { + return int(m.resident - m.dead) +} + +// Capacity returns the number of additional elements +// the can be added to the Map before resizing. +func (m *Map[K, V]) Capacity() int { + return int(m.limit - m.resident) +} + +// find returns the location of |key| if present, or its insertion location if absent. +// for performance, find is manually inlined into public methods. +func (m *Map[K, V]) find(key K, hi h1, lo h2) (g, s uint32, ok bool) { + g = probeStart(hi, len(m.groups)) + for { + matches := metaMatchH2(&m.ctrl[g], lo) + for matches != 0 { + s = nextMatch(&matches) + if key == m.groups[g].keys[s] { + return g, s, true + } + } + // |key| is not in group |g|, + // stop probing if we see an empty slot + matches = metaMatchEmpty(&m.ctrl[g]) + if matches != 0 { + s = nextMatch(&matches) + return g, s, false + } + g += 1 // linear probing + if g >= uint32(len(m.groups)) { + g = 0 + } + } +} + +func (m *Map[K, V]) nextSize() (n uint32) { + n = uint32(len(m.groups)) * 2 + if m.dead >= (m.resident / 2) { + n = uint32(len(m.groups)) + } + return +} + +func (m *Map[K, V]) rehash(n uint32) { + groups, ctrl := m.groups, m.ctrl + m.groups = make([]group[K, V], n) + m.ctrl = make([]metadata, n) + for i := range m.ctrl { + m.ctrl[i] = newEmptyMetadata() + } + m.hash = maphash.NewSeed(m.hash) + m.limit = n * maxAvgGroupLoad + m.resident, m.dead = 0, 0 + for g := range ctrl { + for s := range ctrl[g] { + c := ctrl[g][s] + if c == empty || c == tombstone { + continue + } + m.Put(groups[g].keys[s], groups[g].values[s]) + } + } +} + +func (m *Map[K, V]) loadFactor() float32 { + slots := float32(len(m.groups) * groupSize) + return float32(m.resident-m.dead) / slots +} + +// numGroups returns the minimum number of groups needed to store |n| elems. +func numGroups(n uint32) (groups uint32) { + groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad + if groups == 0 { + groups = 1 + } + return +} + +func newEmptyMetadata() (meta metadata) { + for i := range meta { + meta[i] = empty + } + return +} + +func splitHash(h uint64) (h1, h2) { + return h1((h & h1Mask) >> 7), h2(h & h2Mask) +} + +func probeStart(hi h1, groups int) uint32 { + return fastModN(uint32(hi), uint32(groups)) +} + +// lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ +func fastModN(x, n uint32) uint32 { + return uint32((uint64(x) * uint64(n)) >> 32) +} + +// randIntN returns a random number in the interval [0, n). +func randIntN(n int) uint32 { + return fastModN(fastrand(), uint32(n)) +} diff --git a/vendor/github.com/dolthub/swiss/simd/match.s b/vendor/github.com/dolthub/swiss/simd/match.s new file mode 100644 index 00000000..4ae29e77 --- /dev/null +++ b/vendor/github.com/dolthub/swiss/simd/match.s @@ -0,0 +1,19 @@ +// Code generated by command: go run asm.go -out match.s -stubs match_amd64.go. DO NOT EDIT. + +//go:build amd64 + +#include "textflag.h" + +// func MatchMetadata(metadata *[16]int8, hash int8) uint16 +// Requires: SSE2, SSSE3 +TEXT ·MatchMetadata(SB), NOSPLIT, $0-18 + MOVQ metadata+0(FP), AX + MOVBLSX hash+8(FP), CX + MOVD CX, X0 + PXOR X1, X1 + PSHUFB X1, X0 + MOVOU (AX), X1 + PCMPEQB X1, X0 + PMOVMSKB X0, AX + MOVW AX, ret+16(FP) + RET diff --git a/vendor/github.com/dolthub/swiss/simd/match_amd64.go b/vendor/github.com/dolthub/swiss/simd/match_amd64.go new file mode 100644 index 00000000..538c8e12 --- /dev/null +++ b/vendor/github.com/dolthub/swiss/simd/match_amd64.go @@ -0,0 +1,9 @@ +// Code generated by command: go run asm.go -out match.s -stubs match_amd64.go. DO NOT EDIT. + +//go:build amd64 + +package simd + +// MatchMetadata performs a 16-way probe of |metadata| using SSE instructions +// nb: |metadata| must be an aligned pointer +func MatchMetadata(metadata *[16]int8, hash int8) uint16 diff --git a/vendor/modules.txt b/vendor/modules.txt index 8fb33cf5..b7461958 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -106,6 +106,13 @@ github.com/datarhei/joy4/utils/bits/pio # github.com/davecgh/go-spew v1.1.1 ## explicit github.com/davecgh/go-spew/spew +# github.com/dolthub/maphash v0.1.0 +## explicit; go 1.18 +github.com/dolthub/maphash +# github.com/dolthub/swiss v0.2.1 +## explicit; go 1.18 +github.com/dolthub/swiss +github.com/dolthub/swiss/simd # github.com/dustin/go-humanize v1.0.1 ## explicit; go 1.16 github.com/dustin/go-humanize